ver Feb19th

updated load_charts with legend prop updated _read_cell_style with styles font_underline, font_size, and hyperlink added load_filters and load_pivot_tables
2024-02-19 12:08:36 +08:00
parent c39926fc57
commit 3fb2fd9718
1 changed files with 235 additions and 7 deletions
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -16,12 +16,15 @@ from lxml.etree import _Element
 from openpyxl import Workbook
 from openpyxl.chart._chart import ChartBase
 from openpyxl.worksheet.worksheet import Worksheet
+from openpyxl.worksheet.filters import AutoFilter, SortState
 from openpyxl.worksheet.cell_range import MultiCellRange
 from openpyxl.worksheet.dimensions import DimensionHolder
 from openpyxl.formatting.formatting import ConditionalFormattingList
 #from openpyxl.utils import get_column_letter
 from openpyxl.cell.cell import Cell
 from openpyxl.styles.differential import DifferentialStyle
+from openpyxl.pivot.table import TableDefinition as PivotTableDefinition
+from openpyxl.pivot.cache import CacheSource as PivotCacheSource
 import formulas

 V = TypeVar("Value")
@@ -79,6 +82,7 @@ def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
 # Available Chart Properties:
 # title: str
 # anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1]
+# legend: "b" | "tr" | "l" | "r" | "t"
 # width: number
 # height: number
 # type: "scatterChart" | "lineChart" | "barChart"
@@ -133,7 +137,12 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
        info: Dict[str, Any] = {}

        if "title" in chart_props:
-            info["title"] = ch.title.tx.rich.p[0].r[0].t
+            try:
+                info["title"] = ch.title.tx.rich.p[0].r[0].t
+            except:
+                info["title"] = None
+        if "legend" in chart_props:
+            info["legend"] = ch.legend.position if ch.legend is not None else None
        if "anchor" in chart_props:
            info["anchor"] = [ch.anchor.editAs
                , ch.anchor._from.col, ch.anchor.to.row
@@ -149,15 +158,98 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
            info["direction"] = ch.barDir

        if "xtitle" in chart_props:
-            info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t
+            try:
+                info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t
+            except:
+                info["xtitle"] = None
        if "ytitle" in chart_props:
-            info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t
+            try:
+                info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t
+            except:
+                info["ytitle"] = None
        if "ztitle" in chart_props:
-            info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
+            try:
+                info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
+            except:
+                info["ztitle"] = None
        chart_set[series] = info
    return chart_set
    #  }}} function load_charts # 

+# Available Pivot Properties:
+# name: str
+# show_total, show_empty_row, show_empty_col, show_headers: bool
+# location: str
+# selection: if the concrete item selection should be checked, a list of list of tuple like (bool, index) will be returned
+# filter: if the filter fields should be checked; fields indices will be return in `filter_fields` item
+# col_fields: indices
+# row_fields: indices
+# data_fields: list of str representations. the str representation is like "index;name;subtotal_type;show_data_as"; name is optional and is only returned when `data_fields_name` is specified in `pivot_props`
+def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
+    #  function load_pivot_tables {{{ # 
+    """
+    Args:
+        xlsx_file (Workbook): concerned excel book
+        sheet_name (str): sheet name
+        options (Dict[str, List[str]]): dict like {"pivot_props": list of str}
+          giving the concerned pivot properties
+
+    Returns:
+        Dict[str, Any]: information of pivot tables, dict like
+          {
+            <str representing data source>: {
+                <str as property>: anything
+            }
+          }
+    """
+
+    worksheet: Worksheet = xlsx_file[sheet_name]
+    pivots: List[PivotTableDefinition] = worksheet._pivots
+
+    pivot_set: Dict[str, Any] = {}
+    pivot_props: Set[str] = set(options.get("pivot_props", []))
+    for pvt in pivots:
+        name: str = pvt.name
+        cache_source: PivotCacheSource = pvt.cache.cacheSource
+        source: str = "{:};{:};{:}".format(cache_source.type, cache_source.worksheetSource.ref, cache_source.worksheetSource.sheet)
+
+        info: Dict[str, Any] = {}
+        #info["source"] = 
+        if "name" in pivot_props:
+            info["name"] = name
+
+        if "show_total" in pivot_props:
+            info["show_total"] = pvt.visualTotals
+        if "show_empty_row" in pivot_props:
+            info["show_empty_row"] = pvt.showEmptyRow
+        if "show_empty_col" in pivot_props:
+            info["show_empty_col"] = pvt.showEmptyCol
+        if "show_headers" in pivot_props:
+            info["show_headers"] = pvt.showHeaders
+
+        if "location" in pivot_props:
+            info["location"] = pvt.location
+        if "filter" in pivot_props or "selection" in pivot_props:
+            info["selection"] = [ [(itm.h, itm.x) for itm in f.items]\
+                                  for f in pvt.pivotFields
+                                ]
+        if "filter" in pivot_props:
+            info["filter_fields"] = set(f.fld for f in pvt.pageFields)
+        if "col_fields" in pivot_props:
+            info["col_fields"] = [f.x for f in pvt.colFields]
+        if "row_fields" in pivot_props:
+            info["row_fields"] = [f.x for f in pvt.rowFields]
+        if "data_fields" in pivot_props:
+            info["data_fields"] = [ "{:d};{:};{:};{:}".format( f.fld, f.name if "data_fields_name" in pivot_props else ""
+                                                             , f.subtotal, f.showDataAs
+                                                             )\
+                                    for f in pvt.dataFields
+                                  ]
+
+        pivot_set[source] = info
+    return pivot_set
+    #  }}} function load_pivot_tables # 
+
 _shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
 def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
    #  read_cell_value {{{ # 
@@ -210,9 +302,12 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
 # font_color - in aRGB, e.g., FF000000 is black
 # font_bold - bool
 # font_italic - bool
+# font_underline - "single" | "double" | "singleAccounting" | "doubleAccounting"
+# font_size - float
 # fill_type - "patternFill" | "gradientFill"
 # bgcolor - in aRGB, e.g., FFFF0000 is red
 # fgcolor - in aRGB, e.g., FF00FFFF is yellow
+# hyperlink - str
 def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any:
    if style_name=="number_format":
        return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode)\
@@ -227,12 +322,18 @@ def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[Different
        return (diff_style or cell).font.bold if cell.value is not None else None
    elif style_name=="font_italic":
        return (diff_style or cell).font.italic if cell.value is not None else None
+    elif style_name=="font_underline":
+        return (diff_style or cell).font.underline if cell.value is not None else None
+    elif style_name=="font_size":
+        return (diff_style or cell).font.size if cell.value is not None else None
    elif style_name=="fill_type":
        return (diff_style or cell).fill.tagname
    elif style_name=="bgcolor":
        return (diff_style or cell).fill.bgColor.rgb
    elif style_name=="fgcolor":
        return (diff_style or cell).fill.fgColor.rgb
+    elif style_name=="hyperlink":
+        return cell.hyperlink or "" if cell.value is not None else None
    else:
        raise NotImplementedError("Unsupported Style: {:}".format(style_name))

@@ -328,6 +429,61 @@ def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options)\
    return obj_set
    #  }}} function load_rows_or_cols # 

+def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
+    #  function load_filters {{{ # 
+    worksheet: Worksheet = xlsx_file[sheet_name]
+
+    filters: AutoFilter = worksheet.auto_filter
+    filter_dict: Dict[str, Any] = {}
+    filter_dict["ref"] = filters.ref
+
+    # filterColumn
+    filter_column_set: List[Dict[str, Any]] = []
+    for flt_clm in filters.filterColumn:
+        filter_column: Dict[str, Any] = {}
+        filter_column["col_id"] = flt_clm.colId
+        filter_column["hidden_button"] = flt_clm.hiddenButton
+        filter_column["show_button"] = flt_clm.showButton
+        if flt_clm.filters is not None:
+            filter_column["filters_blank"] = flt_clm.filters.blank
+            filter_column["filters"] = set(flt_clm.filters.filter)
+        if flt_clm.customFilters is not None:
+            filter_column["custom_filters_op"] = flt_clm.customFilters._and
+            filter_column["custom_filters"] = set( ( flt.operator
+                                                   , flt.val
+                                                   )\
+                                                   for flt in flt_clm.customFilters.customFilter
+                                                 )
+        filter_column_set.append(filter_column)
+    filter_column_set = list( sorted( filter_column_set
+                                    , key=(lambda d: d["col_id"])
+                                    )
+                            )
+    filter_dict["filter_column"] = filter_column_set
+
+    # sortState
+    sort_state: Optional[SortState] = filters.sortState
+    if sort_state is not None:
+        sort_state_dict: Dict[str, Any] = {}
+        sort_state_dict["sort"] = sort_state.columnSort
+        sort_state_dict["case"] = sort_state.caseSensitive
+        sort_state_dict["method"] = sort_state.sortMethod
+        sort_state_dict["ref"] = sort_state.ref
+        sort_state_dict["condition"] = list( { "descending": cdt.descending
+                                             , "key": cdt.sortBy
+                                             , "ref": cdt.ref
+                                             , "custom_list": cdt.customList
+                                             , "dxf_id": cdt.dxfId
+                                             , "icon": cdt.iconSet
+                                             , "iconid": cdt.iconId
+                                             }\
+                                             for cdt in sort_state.sortCondition
+                                           )
+        filter_dict["sort_state"] = sort_state_dict
+
+    return filter_dict
+    #  }}} function load_filters # 
+
 def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
    return all(k in item and item[k] == val for k, val in pattern.items())

@@ -431,10 +587,12 @@ def compare_urls(url1, url2):


 if __name__ == "__main__":
-    path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx"
+    path1 = "test.xlsx"
+    #path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
+    path1 = "../../任务集/SheetCopilot/dataset/task_sheet_answers_v2/BoomerangSales/2_BoomerangSales/2_BoomerangSales_gt1.xlsx"
    workbook1: Workbook = openpyxl.load_workbook(filename=path1)
    worksheet1: Worksheet = workbook1.active
-    charts: List[ChartBase] = worksheet1._charts
+    #charts: List[ChartBase] = worksheet1._charts
    # print(len(charts))
    # print(type(charts[0]))
    #
@@ -466,4 +624,74 @@ if __name__ == "__main__":

    # df1 = pd.read_excel(path1)
    # print(df1)
-    print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))
+    #print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))
+    #print(type(worksheet1["A1"].hyperlink))
+    #print(worksheet1["A1"].hyperlink)
+    #print(worksheet1._charts[0].legend)
+            #print(worksheet1._charts[0].legend.position)
+    #for entr in worksheet1._charts[0].legend.legendEntry:
+            #print("Entr", entr.txPr.p[0].r[0].t)
+    #print(load_filters(workbook1, "工作表1"))
+    #print(worksheet1.auto_filter)
+    #for pvt in worksheet1._pivots:
+        ##print(type(pvt))
+        ##print(pvt)
+        #print(type(pvt.cache))
+                      ##print(pvt.cache)
+        #print(pvt.cache.cacheSource.type)
+        #print(pvt.cache.cacheSource.worksheetSource.ref)
+        #print(pvt.cache.cacheSource.worksheetSource.sheet)
+#
+        #print(type(pvt.location))
+        #print(pvt.location)
+        #for f in pvt.pivotFields:
+            #print(type(f))
+            #print([(itm.h, itm.x) for itm in f.items])
+                                       ##for f_itm in f.items:
+                ##print(f_itm.n)
+                ##print(f_itm.t)
+                ##print(f_itm.h)
+           ##print(f_itm.s)
+           ##print(f_itm.sd)
+           ##print(f_itm.f)
+           ##print(f_itm.m)
+                                    ##print(f_itm.c)
+                                    ##print(f_itm.x)
+                                            ##print(f_itm.d)
+                        ##print(f_itm.e)
+           ##print(f.countASubtotal)
+            ##print(f.countSubtotal)
+            ##for f in pvt.dataFields:
+                       ##print(f.name)
+                       ##print(f.fld)
+                                    ###print(f.baseField)
+                                    ##print(f.subtotal)
+                       ##print(f.showDataAs)
+                  ##for f in pvt.rowFields:
+                  ##print(1, f.x)
+           ##for f in pvt.rowItems:
+                    ##print(2, f.t, f.r, f.i, f.x)
+           ##for f in pvt.colFields:
+           ##print(3, f.x)
+                  ##for f in pvt.colItems:
+        ##print(4, f.t, f.r, f.i, f.x)
+        #for f in pvt.pageFields:
+            #print(5, f.fld)
+        #for flt in pvt.filters:
+        #print(5, flt.fld)
+                                            #print(6, flt.mpFld)
+                              #print(7, flt.type)
+                              #print(8, flt.evalOrder)
+                              #print(9, flt.id)
+                                            #print(10, flt.stringValue1)
+                              #print(11, flt.stringValue2)
+   #print(load_charts(workbook1, "Sheet2", chart_props=["title", "type", "legend"]))
+    #print(load_filters(workbook1, "透视表_工作表1_1"))
+    #workbook1.save("test2.xlsx")
+    print( load_pivot_tables( workbook1, "Sheet2", pivot_props=[ "col_fields"
+                                                               , "filter"
+                                                               , "row_fields"
+                                                               , "data_fields"
+                                                               ]
+                            )
+         )