Code clean
This commit is contained in:
@@ -284,18 +284,6 @@ def compare_contains_image(docx_file1, docx_file2):
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
||||||
# file1 = 'path/to/file1.docx'
|
|
||||||
# file2 = 'path/to/file2.docx'
|
|
||||||
|
|
||||||
# print(are_docx_files_same(file1, file2))
|
|
||||||
# Replace 'your_document.docx' with the path to your document
|
|
||||||
# result = contains_page_break('your_document.docx')
|
|
||||||
# print(result)
|
|
||||||
|
|
||||||
# config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
|
|
||||||
# print(find_default_font("Ani", config_path))
|
|
||||||
|
|
||||||
|
|
||||||
def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
|
def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
|
||||||
if not compare_docx_files(file_path1, file_path2):
|
if not compare_docx_files(file_path1, file_path2):
|
||||||
return 0
|
return 0
|
||||||
@@ -538,12 +526,3 @@ def compare_highlighted_text(file1, file2):
|
|||||||
return 1
|
return 1
|
||||||
else:
|
else:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
print(
|
|
||||||
compare_docx_files(
|
|
||||||
r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\88fe4b2d-3040-4c70-9a70-546a47764b48\CCCH9003_Tutorial_guidelines.docx",
|
|
||||||
r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\88fe4b2d-3040-4c70-9a70-546a47764b48\CCCH9003_Tutorial_guidelines_Gold.docx",
|
|
||||||
ignore_blanks=False
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -1,46 +1,47 @@
|
|||||||
|
import builtins
|
||||||
|
import functools
|
||||||
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
|
import operator
|
||||||
|
import re
|
||||||
import zipfile
|
import zipfile
|
||||||
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
|
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
|
||||||
from typing import Dict, List, Set, Match, Tuple, Pattern
|
from typing import Dict, List, Set, Match, Tuple, Pattern
|
||||||
from urllib.parse import urlparse, urlunparse
|
from urllib.parse import urlparse, urlunparse
|
||||||
import re
|
|
||||||
import functools
|
|
||||||
import operator
|
|
||||||
import builtins
|
|
||||||
import itertools
|
|
||||||
|
|
||||||
|
import formulas
|
||||||
import lxml.cssselect
|
import lxml.cssselect
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
import openpyxl
|
|
||||||
import xmltodict
|
import xmltodict
|
||||||
from lxml.etree import _Element
|
from lxml.etree import _Element
|
||||||
from openpyxl import Workbook
|
from openpyxl import Workbook
|
||||||
|
from openpyxl.cell.cell import Cell
|
||||||
from openpyxl.chart._chart import ChartBase
|
from openpyxl.chart._chart import ChartBase
|
||||||
from openpyxl.worksheet.worksheet import Worksheet
|
from openpyxl.formatting.formatting import ConditionalFormattingList
|
||||||
from openpyxl.worksheet.filters import AutoFilter, SortState
|
from openpyxl.pivot.cache import CacheSource as PivotCacheSource
|
||||||
|
from openpyxl.pivot.table import TableDefinition as PivotTableDefinition
|
||||||
|
from openpyxl.styles.differential import DifferentialStyle
|
||||||
|
from openpyxl.utils import coordinate_to_tuple, get_column_letter
|
||||||
from openpyxl.worksheet.cell_range import MultiCellRange, CellRange
|
from openpyxl.worksheet.cell_range import MultiCellRange, CellRange
|
||||||
from openpyxl.worksheet.dimensions import DimensionHolder
|
from openpyxl.worksheet.dimensions import DimensionHolder
|
||||||
from openpyxl.formatting.formatting import ConditionalFormattingList
|
from openpyxl.worksheet.filters import AutoFilter, SortState
|
||||||
from openpyxl.utils import coordinate_to_tuple, get_column_letter
|
from openpyxl.worksheet.worksheet import Worksheet
|
||||||
from openpyxl.cell.cell import Cell
|
|
||||||
from openpyxl.styles.differential import DifferentialStyle
|
|
||||||
from openpyxl.pivot.table import TableDefinition as PivotTableDefinition
|
|
||||||
from openpyxl.pivot.cache import CacheSource as PivotCacheSource
|
|
||||||
import formulas
|
|
||||||
|
|
||||||
V = TypeVar("Value")
|
V = TypeVar("Value")
|
||||||
|
|
||||||
logger = logging.getLogger("desktopenv.metrics.utils")
|
logger = logging.getLogger("desktopenv.metrics.utils")
|
||||||
|
|
||||||
_xlsx_namespaces = [ ("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")
|
_xlsx_namespaces = [("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")
|
||||||
, ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
|
, ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
|
||||||
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
|
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
|
||||||
]
|
]
|
||||||
_xlsx_ns_mapping = dict(_xlsx_namespaces)
|
_xlsx_ns_mapping = dict(_xlsx_namespaces)
|
||||||
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
|
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
|
||||||
_xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None
|
_xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None
|
||||||
_sheet_name_selector = lxml.cssselect.CSSSelector("oo|sheets>oo|sheet", namespaces=_xlsx_ns_mapping)
|
_sheet_name_selector = lxml.cssselect.CSSSelector("oo|sheets>oo|sheet", namespaces=_xlsx_ns_mapping)
|
||||||
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
|
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
|
||||||
|
|
||||||
|
|
||||||
def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
|
def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
|
||||||
# function load_sparklines {{{ #
|
# function load_sparklines {{{ #
|
||||||
"""
|
"""
|
||||||
@@ -174,6 +175,7 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
|
|||||||
return chart_set
|
return chart_set
|
||||||
# }}} function load_charts #
|
# }}} function load_charts #
|
||||||
|
|
||||||
|
|
||||||
# Available Pivot Properties:
|
# Available Pivot Properties:
|
||||||
# name: str
|
# name: str
|
||||||
# show_total, show_empty_row, show_empty_col, show_headers: bool
|
# show_total, show_empty_row, show_empty_col, show_headers: bool
|
||||||
@@ -210,23 +212,26 @@ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[s
|
|||||||
pivot_set: Dict[str, Any] = {}
|
pivot_set: Dict[str, Any] = {}
|
||||||
pivot_props: Set[str] = set(options.get("pivot_props", []))
|
pivot_props: Set[str] = set(options.get("pivot_props", []))
|
||||||
for pvt in pivots:
|
for pvt in pivots:
|
||||||
raw_selection: List[List[tuple[Optional[bool], int]]] =\
|
raw_selection: List[List[tuple[Optional[bool], int]]] = \
|
||||||
[ [(itm.h, itm.x) for itm in f.items if itm.x is not None]\
|
[[(itm.h, itm.x) for itm in f.items if itm.x is not None] \
|
||||||
for f in pvt.pivotFields
|
for f in pvt.pivotFields
|
||||||
]
|
]
|
||||||
raw__selection: List[List[tuple[Optional[bool], int]]] = list(itertools.dropwhile(lambda r: len(r)==0, raw_selection))
|
raw__selection: List[List[tuple[Optional[bool], int]]] = list(
|
||||||
left_bias = len(raw_selection)-len(raw__selection)
|
itertools.dropwhile(lambda r: len(r) == 0, raw_selection))
|
||||||
selection: List[List[tuple[Optional[bool], int]]] = list((itertools.dropwhile(lambda r: len(r)==0, reversed(raw__selection))))[::-1]
|
left_bias = len(raw_selection) - len(raw__selection)
|
||||||
right_bias = len(raw__selection)-len(selection)
|
selection: List[List[tuple[Optional[bool], int]]] = list(
|
||||||
|
(itertools.dropwhile(lambda r: len(r) == 0, reversed(raw__selection))))[::-1]
|
||||||
|
right_bias = len(raw__selection) - len(selection)
|
||||||
cache_source: PivotCacheSource = pvt.cache.cacheSource
|
cache_source: PivotCacheSource = pvt.cache.cacheSource
|
||||||
cell_range1: str
|
cell_range1: str
|
||||||
cell_range2: str
|
cell_range2: str
|
||||||
cell_range1, cell_range2 = cache_source.worksheetSource.ref.split(":")
|
cell_range1, cell_range2 = cache_source.worksheetSource.ref.split(":")
|
||||||
cell_range1: Tuple[int, int] = coordinate_to_tuple(cell_range1)
|
cell_range1: Tuple[int, int] = coordinate_to_tuple(cell_range1)
|
||||||
cell_range1 = (cell_range1[0], cell_range1[1]+left_bias)
|
cell_range1 = (cell_range1[0], cell_range1[1] + left_bias)
|
||||||
cell_range2: Tuple[int, int] = coordinate_to_tuple(cell_range2)
|
cell_range2: Tuple[int, int] = coordinate_to_tuple(cell_range2)
|
||||||
cell_range2 = (cell_range2[0], cell_range2[1]-right_bias)
|
cell_range2 = (cell_range2[0], cell_range2[1] - right_bias)
|
||||||
source: str = "{:};{:}:{:};{:}".format(cache_source.type, cell_range1, cell_range2, cache_source.worksheetSource.sheet)
|
source: str = "{:};{:}:{:};{:}".format(cache_source.type, cell_range1, cell_range2,
|
||||||
|
cache_source.worksheetSource.sheet)
|
||||||
|
|
||||||
info: Dict[str, Any] = {}
|
info: Dict[str, Any] = {}
|
||||||
if "name" in pivot_props:
|
if "name" in pivot_props:
|
||||||
@@ -248,22 +253,26 @@ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[s
|
|||||||
if "filter" in pivot_props:
|
if "filter" in pivot_props:
|
||||||
info["filter_fields"] = set(f.fld for f in pvt.pageFields)
|
info["filter_fields"] = set(f.fld for f in pvt.pageFields)
|
||||||
if "col_fields" in pivot_props:
|
if "col_fields" in pivot_props:
|
||||||
info["col_fields"] = [f.x-left_bias for f in pvt.colFields]
|
info["col_fields"] = [f.x - left_bias for f in pvt.colFields]
|
||||||
if "row_fields" in pivot_props:
|
if "row_fields" in pivot_props:
|
||||||
info["row_fields"] = [f.x-left_bias for f in pvt.rowFields]
|
info["row_fields"] = [f.x - left_bias for f in pvt.rowFields]
|
||||||
if "data_fields" in pivot_props:
|
if "data_fields" in pivot_props:
|
||||||
info["data_fields"] = [ "{:d};{:};{:};{:}".format( f.fld-left_bias, f.name if "data_fields_name" in pivot_props else ""
|
info["data_fields"] = [
|
||||||
, f.subtotal, f.showDataAs
|
"{:d};{:};{:};{:}".format(f.fld - left_bias, f.name if "data_fields_name" in pivot_props else ""
|
||||||
)\
|
, f.subtotal, f.showDataAs
|
||||||
for f in pvt.dataFields
|
) \
|
||||||
]
|
for f in pvt.dataFields
|
||||||
|
]
|
||||||
|
|
||||||
pivot_set[source] = info
|
pivot_set[source] = info
|
||||||
logger.debug(".[%s].pivots: %s", sheet_name, repr(pivot_set))
|
logger.debug(".[%s].pivots: %s", sheet_name, repr(pivot_set))
|
||||||
return pivot_set
|
return pivot_set
|
||||||
# }}} function load_pivot_tables #
|
# }}} function load_pivot_tables #
|
||||||
|
|
||||||
|
|
||||||
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
|
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
|
||||||
|
|
||||||
|
|
||||||
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
||||||
# read_cell_value {{{ #
|
# read_cell_value {{{ #
|
||||||
try:
|
try:
|
||||||
@@ -283,20 +292,20 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
|||||||
|
|
||||||
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
|
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
|
||||||
sheet: _Element = lxml.etree.fromstring(f.read())
|
sheet: _Element = lxml.etree.fromstring(f.read())
|
||||||
cells: List[_Element] =\
|
cells: List[_Element] = \
|
||||||
lxml.cssselect.CSSSelector( 'oo|row>oo|c[r="{:}"]'.format(coordinate)
|
lxml.cssselect.CSSSelector('oo|row>oo|c[r="{:}"]'.format(coordinate)
|
||||||
, namespaces=_xlsx_ns_mapping
|
, namespaces=_xlsx_ns_mapping
|
||||||
)(sheet)
|
)(sheet)
|
||||||
if len(cells)==0:
|
if len(cells) == 0:
|
||||||
return None
|
return None
|
||||||
cell: _Element = cells[0]
|
cell: _Element = cells[0]
|
||||||
except zipfile.BadZipFile:
|
except zipfile.BadZipFile:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
cell: Dict[str, str] = xmltodict.parse( lxml.etree.tostring(cell, encoding="unicode")
|
cell: Dict[str, str] = xmltodict.parse(lxml.etree.tostring(cell, encoding="unicode")
|
||||||
, process_namespaces=True
|
, process_namespaces=True
|
||||||
, namespaces=_xlsx_ns_imapping
|
, namespaces=_xlsx_ns_imapping
|
||||||
)
|
)
|
||||||
logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell))
|
logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell))
|
||||||
if "@t" not in cell["c"]:
|
if "@t" not in cell["c"]:
|
||||||
return None
|
return None
|
||||||
@@ -308,6 +317,7 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
|||||||
return cell["c"]["v"]
|
return cell["c"]["v"]
|
||||||
# }}} read_cell_value #
|
# }}} read_cell_value #
|
||||||
|
|
||||||
|
|
||||||
# Supported Styles:
|
# Supported Styles:
|
||||||
# number_format
|
# number_format
|
||||||
# font_name - str
|
# font_name - str
|
||||||
@@ -322,50 +332,53 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
|||||||
# fgcolor - in aRGB, e.g., FF00FFFF is yellow
|
# fgcolor - in aRGB, e.g., FF00FFFF is yellow
|
||||||
# hyperlink - str
|
# hyperlink - str
|
||||||
def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any:
|
def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any:
|
||||||
if style_name=="number_format":
|
if style_name == "number_format":
|
||||||
return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode)\
|
return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode) \
|
||||||
if cell.value is not None and cell.data_type=="n" else None
|
if cell.value is not None and cell.data_type == "n" else None
|
||||||
elif style_name=="font_name":
|
elif style_name == "font_name":
|
||||||
return (diff_style or cell).font.name if cell.value is not None else None
|
return (diff_style or cell).font.name if cell.value is not None else None
|
||||||
elif style_name=="font_family":
|
elif style_name == "font_family":
|
||||||
return (diff_style or cell).font.family if cell.value is not None else None
|
return (diff_style or cell).font.family if cell.value is not None else None
|
||||||
elif style_name=="font_color":
|
elif style_name == "font_color":
|
||||||
return (diff_style or cell).font.color.rgb if cell.value is not None else None
|
return (diff_style or cell).font.color.rgb if cell.value is not None else None
|
||||||
elif style_name=="font_bold":
|
elif style_name == "font_bold":
|
||||||
return (diff_style or cell).font.bold if cell.value is not None else None
|
return (diff_style or cell).font.bold if cell.value is not None else None
|
||||||
elif style_name=="font_italic":
|
elif style_name == "font_italic":
|
||||||
return (diff_style or cell).font.italic if cell.value is not None else None
|
return (diff_style or cell).font.italic if cell.value is not None else None
|
||||||
elif style_name=="font_underline":
|
elif style_name == "font_underline":
|
||||||
return (diff_style or cell).font.underline if cell.value is not None else None
|
return (diff_style or cell).font.underline if cell.value is not None else None
|
||||||
elif style_name=="font_size":
|
elif style_name == "font_size":
|
||||||
return (diff_style or cell).font.size if cell.value is not None else None
|
return (diff_style or cell).font.size if cell.value is not None else None
|
||||||
elif style_name=="fill_type":
|
elif style_name == "fill_type":
|
||||||
try:
|
try:
|
||||||
return (diff_style or cell).fill.tagname
|
return (diff_style or cell).fill.tagname
|
||||||
except:
|
except:
|
||||||
return None
|
return None
|
||||||
elif style_name=="bgcolor":
|
elif style_name == "bgcolor":
|
||||||
try:
|
try:
|
||||||
return (diff_style or cell).fill.bgColor.rgb
|
return (diff_style or cell).fill.bgColor.rgb
|
||||||
except:
|
except:
|
||||||
return None
|
return None
|
||||||
elif style_name=="fgcolor":
|
elif style_name == "fgcolor":
|
||||||
try:
|
try:
|
||||||
return (diff_style or cell).fill.fgColor.rgb
|
return (diff_style or cell).fill.fgColor.rgb
|
||||||
except:
|
except:
|
||||||
return None
|
return None
|
||||||
elif style_name=="hyperlink":
|
elif style_name == "hyperlink":
|
||||||
return cell.hyperlink or "" if cell.value is not None else None
|
return cell.hyperlink or "" if cell.value is not None else None
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
|
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
|
||||||
|
|
||||||
_absolute_range_pattern: Pattern[str] = re.compile( r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
|
|
||||||
|
_absolute_range_pattern: Pattern[str] = re.compile(r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
|
||||||
(?::
|
(?::
|
||||||
\$(?P<col2>[A-Z]{1,3})\$(?P<row2>\d+) # coord2
|
\$(?P<col2>[A-Z]{1,3})\$(?P<row2>\d+) # coord2
|
||||||
)?
|
)?
|
||||||
"""
|
"""
|
||||||
, re.X
|
, re.X
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **options) -> Dict[str, List[Any]]:
|
def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **options) -> Dict[str, List[Any]]:
|
||||||
# function load_xlsx_styles {{{ #
|
# function load_xlsx_styles {{{ #
|
||||||
"""
|
"""
|
||||||
@@ -417,24 +430,24 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
|
|||||||
if m[2] is None and m[3] is None:
|
if m[2] is None and m[3] is None:
|
||||||
arguments.append(read_cell_value(book_name, sheet_name, coordinate="{:}{:}".format(m[0], m[1])))
|
arguments.append(read_cell_value(book_name, sheet_name, coordinate="{:}{:}".format(m[0], m[1])))
|
||||||
else:
|
else:
|
||||||
arguments.append( [ read_cell_value( book_name, sheet_name
|
arguments.append([read_cell_value(book_name, sheet_name
|
||||||
, coordinate="{:}{:}".format( get_column_letter(c[1])
|
, coordinate="{:}{:}".format(get_column_letter(c[1])
|
||||||
, c[0]
|
, c[0]
|
||||||
)
|
)
|
||||||
)\
|
) \
|
||||||
for c in CellRange("{:}{:}:{:}{:}".format(m[0], m[1], m[2], m[3])).cells\
|
for c in CellRange("{:}{:}:{:}{:}".format(m[0], m[1], m[2], m[3])).cells \
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
logger.debug("Absolute range arguments: %s", repr(arguments))
|
logger.debug("Absolute range arguments: %s", repr(arguments))
|
||||||
|
|
||||||
for rge in fmt.cells:
|
for rge in fmt.cells:
|
||||||
for c in rge.cells:
|
for c in rge.cells:
|
||||||
cell: Cell = worksheet.cell(row=c[0], column=c[1])
|
cell: Cell = worksheet.cell(row=c[0], column=c[1])
|
||||||
cell_value = read_cell_value( book_name, sheet_name
|
cell_value = read_cell_value(book_name, sheet_name
|
||||||
, coordinate="{:}{:d}".format( get_column_letter(c[1])
|
, coordinate="{:}{:d}".format(get_column_letter(c[1])
|
||||||
, c[0]
|
, c[0]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if condition(cell_value, *arguments):
|
if condition(cell_value, *arguments):
|
||||||
logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0])
|
logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0])
|
||||||
active_cells.append(cell)
|
active_cells.append(cell)
|
||||||
@@ -448,6 +461,7 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
|
|||||||
return style_dict
|
return style_dict
|
||||||
# }}} function load_xlsx_styles #
|
# }}} function load_xlsx_styles #
|
||||||
|
|
||||||
|
|
||||||
# Available Row Properties:
|
# Available Row Properties:
|
||||||
# hidden
|
# hidden
|
||||||
# collapsed
|
# collapsed
|
||||||
@@ -460,7 +474,7 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
|
|||||||
# collapsed
|
# collapsed
|
||||||
# min
|
# min
|
||||||
# max
|
# max
|
||||||
def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options)\
|
def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options) \
|
||||||
-> Dict[Union[int, str], Dict[str, Any]]:
|
-> Dict[Union[int, str], Dict[str, Any]]:
|
||||||
# function load_rows_or_cols {{{ #
|
# function load_rows_or_cols {{{ #
|
||||||
"""
|
"""
|
||||||
@@ -491,6 +505,7 @@ def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options)\
|
|||||||
return obj_set
|
return obj_set
|
||||||
# }}} function load_rows_or_cols #
|
# }}} function load_rows_or_cols #
|
||||||
|
|
||||||
|
|
||||||
def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
|
def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
|
||||||
# function load_filters {{{ #
|
# function load_filters {{{ #
|
||||||
try:
|
try:
|
||||||
@@ -514,16 +529,16 @@ def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, A
|
|||||||
filter_column["filters"] = set(flt_clm.filters.filter)
|
filter_column["filters"] = set(flt_clm.filters.filter)
|
||||||
if flt_clm.customFilters is not None:
|
if flt_clm.customFilters is not None:
|
||||||
filter_column["custom_filters_op"] = flt_clm.customFilters._and
|
filter_column["custom_filters_op"] = flt_clm.customFilters._and
|
||||||
filter_column["custom_filters"] = set( ( flt.operator
|
filter_column["custom_filters"] = set((flt.operator
|
||||||
, flt.val
|
, flt.val
|
||||||
)\
|
) \
|
||||||
for flt in flt_clm.customFilters.customFilter
|
for flt in flt_clm.customFilters.customFilter
|
||||||
)
|
)
|
||||||
filter_column_set.append(filter_column)
|
filter_column_set.append(filter_column)
|
||||||
filter_column_set = list( sorted( filter_column_set
|
filter_column_set = list(sorted(filter_column_set
|
||||||
, key=(lambda d: d["col_id"])
|
, key=(lambda d: d["col_id"])
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
filter_dict["filter_column"] = filter_column_set
|
filter_dict["filter_column"] = filter_column_set
|
||||||
|
|
||||||
# sortState
|
# sortState
|
||||||
@@ -534,26 +549,30 @@ def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, A
|
|||||||
sort_state_dict["case"] = sort_state.caseSensitive
|
sort_state_dict["case"] = sort_state.caseSensitive
|
||||||
sort_state_dict["method"] = sort_state.sortMethod
|
sort_state_dict["method"] = sort_state.sortMethod
|
||||||
sort_state_dict["ref"] = sort_state.ref
|
sort_state_dict["ref"] = sort_state.ref
|
||||||
sort_state_dict["condition"] = list( { "descending": cdt.descending
|
sort_state_dict["condition"] = list({"descending": cdt.descending
|
||||||
, "key": cdt.sortBy
|
, "key": cdt.sortBy
|
||||||
, "ref": cdt.ref
|
, "ref": cdt.ref
|
||||||
, "custom_list": cdt.customList
|
, "custom_list": cdt.customList
|
||||||
, "dxf_id": cdt.dxfId
|
, "dxf_id": cdt.dxfId
|
||||||
, "icon": cdt.iconSet
|
, "icon": cdt.iconSet
|
||||||
, "iconid": cdt.iconId
|
, "iconid": cdt.iconId
|
||||||
}\
|
} \
|
||||||
for cdt in sort_state.sortCondition
|
for cdt in sort_state.sortCondition
|
||||||
)
|
)
|
||||||
filter_dict["sort_state"] = sort_state_dict
|
filter_dict["sort_state"] = sort_state_dict
|
||||||
|
|
||||||
return filter_dict
|
return filter_dict
|
||||||
# }}} function load_filters #
|
# }}} function load_filters #
|
||||||
|
|
||||||
|
|
||||||
def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
|
def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
|
||||||
return all(k in item and item[k] == val for k, val in pattern.items())
|
return all(k in item and item[k] == val for k, val in pattern.items())
|
||||||
|
|
||||||
|
|
||||||
def _multicellrange_containsby(subset_candidate: MultiCellRange, superset_candidate: MultiCellRange) -> bool:
|
def _multicellrange_containsby(subset_candidate: MultiCellRange, superset_candidate: MultiCellRange) -> bool:
|
||||||
return all(r in superset_candidate for r in subset_candidate)
|
return all(r in superset_candidate for r in subset_candidate)
|
||||||
|
|
||||||
|
|
||||||
def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
|
def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
@@ -576,10 +595,10 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
|
|||||||
|
|
||||||
match_: Optional[Match[str]] = re.search(rule["ref"], value, flag)
|
match_: Optional[Match[str]] = re.search(rule["ref"], value, flag)
|
||||||
return match_ is not None
|
return match_ is not None
|
||||||
if rule["method"] in { "eq", "ne"
|
if rule["method"] in {"eq", "ne"
|
||||||
, "le", "lt"
|
, "le", "lt"
|
||||||
, "ge", "gt"
|
, "ge", "gt"
|
||||||
}:
|
}:
|
||||||
return getattr(operator, rule["method"])(value, rule["ref"])
|
return getattr(operator, rule["method"])(value, rule["ref"])
|
||||||
if rule["method"].startswith("approx"):
|
if rule["method"].startswith("approx"):
|
||||||
threshold: float = float(rule["method"].split(":")[1])
|
threshold: float = float(rule["method"].split(":")[1])
|
||||||
@@ -589,26 +608,27 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
|
|||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
return abs(value-rule["ref"])<=threshold
|
return abs(value - rule["ref"]) <= threshold
|
||||||
if rule["method"] == "spreadsheet_range":
|
if rule["method"] == "spreadsheet_range":
|
||||||
subset_limit = MultiCellRange(rule["ref"][0])
|
subset_limit = MultiCellRange(rule["ref"][0])
|
||||||
superset_limit = MultiCellRange(rule["ref"][1])
|
superset_limit = MultiCellRange(rule["ref"][1])
|
||||||
return _multicellrange_containsby(subset_limit, value)\
|
return _multicellrange_containsby(subset_limit, value) \
|
||||||
and _multicellrange_containsby(value, superset_limit)
|
and _multicellrange_containsby(value, superset_limit)
|
||||||
if rule["method"].startswith("range."): # e.g., range.te [0, 2] -> 0 < x <= 2
|
if rule["method"].startswith("range."): # e.g., range.te [0, 2] -> 0 < x <= 2
|
||||||
left_et = rule["method"][6]
|
left_et = rule["method"][6]
|
||||||
right_et = rule["method"][7]
|
right_et = rule["method"][7]
|
||||||
return getattr(operator, "l" + left_et)(rule["ref"][0], value)\
|
return getattr(operator, "l" + left_et)(rule["ref"][0], value) \
|
||||||
and getattr(operator, "l" + right_et)(value, rule["ref"][1])
|
and getattr(operator, "l" + right_et)(value, rule["ref"][1])
|
||||||
if rule["method"] in {"str_list_eq", "str_set_eq"}:
|
if rule["method"] in {"str_list_eq", "str_set_eq"}:
|
||||||
container_type_str: str = rule["method"][4:-3]
|
container_type_str: str = rule["method"][4:-3]
|
||||||
container_type = getattr(builtins, container_type_str)
|
container_type = getattr(builtins, container_type_str)
|
||||||
|
|
||||||
value: container_type = container_type(value.strip("\"'").split(","))
|
value: container_type = container_type(value.strip("\"'").split(","))
|
||||||
ref: container_type = container_type(rule["ref"])
|
ref: container_type = container_type(rule["ref"])
|
||||||
return value==ref
|
return value == ref
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
def are_lists_equal(list1, list2, comparison_func):
|
def are_lists_equal(list1, list2, comparison_func):
|
||||||
# First check if both lists have the same length
|
# First check if both lists have the same length
|
||||||
if len(list1) != len(list2):
|
if len(list1) != len(list2):
|
||||||
@@ -652,114 +672,3 @@ def compare_urls(url1, url2):
|
|||||||
|
|
||||||
# Compare the normalized URLs
|
# Compare the normalized URLs
|
||||||
return norm_url1 == norm_url2
|
return norm_url1 == norm_url2
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
path1 = "test.xlsx"
|
|
||||||
#path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
|
|
||||||
path1 = "../../任务集/SheetCopilot/dataset/task_sheet_answers_v2/BoomerangSales/2_BoomerangSales/2_BoomerangSales_gt1.xlsx"
|
|
||||||
workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
|
||||||
worksheet1: Worksheet = workbook1.active
|
|
||||||
#charts: List[ChartBase] = worksheet1._charts
|
|
||||||
# print(len(charts))
|
|
||||||
# print(type(charts[0]))
|
|
||||||
#
|
|
||||||
# print(len(charts[0].series))
|
|
||||||
# print(type(charts[0].series[0]))
|
|
||||||
# print(type(charts[0].series[0].val))
|
|
||||||
##print(charts[0].series[0].val)
|
|
||||||
# print(charts[0].series[0].val.numRef.f)
|
|
||||||
#
|
|
||||||
# print(type(charts[0].series[0].cat))
|
|
||||||
##print(charts[0].series[0].cat)
|
|
||||||
# print(charts[0].series[0].cat.numRef)
|
|
||||||
# print(charts[0].series[0].cat.strRef)
|
|
||||||
# print(charts[0].series[0].cat.strRef.f)
|
|
||||||
|
|
||||||
# print(type(charts[0].title.tx.strRef))
|
|
||||||
# print(type(charts[0].title.tx.rich))
|
|
||||||
# print(type(charts[0].title.txPr))
|
|
||||||
# print(len(charts[0].title.tx.rich.p))
|
|
||||||
# print(len(charts[0].title.tx.rich.p[0].r))
|
|
||||||
# print(type(charts[0].title.tx.rich.p[0].r[0]))
|
|
||||||
# print(type(charts[0].title.tx.rich.p[0].r[0].t))
|
|
||||||
# print(charts[0].title.tx.rich.p[0].r[0].t)
|
|
||||||
|
|
||||||
# print(type(charts[0].anchor))
|
|
||||||
# print(charts[0].anchor.editAs)
|
|
||||||
# print(charts[0].anchor._from.col, charts[0].anchor.to.row)
|
|
||||||
# print(charts[0].anchor.to.col, charts[0].anchor.to.row)
|
|
||||||
|
|
||||||
# df1 = pd.read_excel(path1)
|
|
||||||
# print(df1)
|
|
||||||
#print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))
|
|
||||||
#print(type(worksheet1["A1"].hyperlink))
|
|
||||||
#print(worksheet1["A1"].hyperlink)
|
|
||||||
#print(worksheet1._charts[0].legend)
|
|
||||||
#print(worksheet1._charts[0].legend.position)
|
|
||||||
#for entr in worksheet1._charts[0].legend.legendEntry:
|
|
||||||
#print("Entr", entr.txPr.p[0].r[0].t)
|
|
||||||
#print(load_filters(workbook1, "工作表1"))
|
|
||||||
#print(worksheet1.auto_filter)
|
|
||||||
#for pvt in worksheet1._pivots:
|
|
||||||
##print(type(pvt))
|
|
||||||
##print(pvt)
|
|
||||||
#print(type(pvt.cache))
|
|
||||||
##print(pvt.cache)
|
|
||||||
#print(pvt.cache.cacheSource.type)
|
|
||||||
#print(pvt.cache.cacheSource.worksheetSource.ref)
|
|
||||||
#print(pvt.cache.cacheSource.worksheetSource.sheet)
|
|
||||||
#
|
|
||||||
#print(type(pvt.location))
|
|
||||||
#print(pvt.location)
|
|
||||||
#for f in pvt.pivotFields:
|
|
||||||
#print(type(f))
|
|
||||||
#print([(itm.h, itm.x) for itm in f.items])
|
|
||||||
##for f_itm in f.items:
|
|
||||||
##print(f_itm.n)
|
|
||||||
##print(f_itm.t)
|
|
||||||
##print(f_itm.h)
|
|
||||||
##print(f_itm.s)
|
|
||||||
##print(f_itm.sd)
|
|
||||||
##print(f_itm.f)
|
|
||||||
##print(f_itm.m)
|
|
||||||
##print(f_itm.c)
|
|
||||||
##print(f_itm.x)
|
|
||||||
##print(f_itm.d)
|
|
||||||
##print(f_itm.e)
|
|
||||||
##print(f.countASubtotal)
|
|
||||||
##print(f.countSubtotal)
|
|
||||||
##for f in pvt.dataFields:
|
|
||||||
##print(f.name)
|
|
||||||
##print(f.fld)
|
|
||||||
###print(f.baseField)
|
|
||||||
##print(f.subtotal)
|
|
||||||
##print(f.showDataAs)
|
|
||||||
##for f in pvt.rowFields:
|
|
||||||
##print(1, f.x)
|
|
||||||
##for f in pvt.rowItems:
|
|
||||||
##print(2, f.t, f.r, f.i, f.x)
|
|
||||||
##for f in pvt.colFields:
|
|
||||||
##print(3, f.x)
|
|
||||||
##for f in pvt.colItems:
|
|
||||||
##print(4, f.t, f.r, f.i, f.x)
|
|
||||||
#for f in pvt.pageFields:
|
|
||||||
#print(5, f.fld)
|
|
||||||
#for flt in pvt.filters:
|
|
||||||
#print(5, flt.fld)
|
|
||||||
#print(6, flt.mpFld)
|
|
||||||
#print(7, flt.type)
|
|
||||||
#print(8, flt.evalOrder)
|
|
||||||
#print(9, flt.id)
|
|
||||||
#print(10, flt.stringValue1)
|
|
||||||
#print(11, flt.stringValue2)
|
|
||||||
#print(load_charts(workbook1, "Sheet2", chart_props=["title", "type", "legend"]))
|
|
||||||
#print(load_filters(workbook1, "透视表_工作表1_1"))
|
|
||||||
#workbook1.save("test2.xlsx")
|
|
||||||
print( load_pivot_tables( workbook1, "Sheet2", pivot_props=[ "col_fields"
|
|
||||||
, "filter"
|
|
||||||
, "row_fields"
|
|
||||||
, "data_fields"
|
|
||||||
]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user