Code clean

This commit is contained in:
Timothyxxx
2024-02-23 15:40:26 +08:00
parent 1610358e08
commit 7427b39d1d
2 changed files with 121 additions and 233 deletions

View File

@@ -284,18 +284,6 @@ def compare_contains_image(docx_file1, docx_file2):
return 1 return 1
# file1 = 'path/to/file1.docx'
# file2 = 'path/to/file2.docx'
# print(are_docx_files_same(file1, file2))
# Replace 'your_document.docx' with the path to your document
# result = contains_page_break('your_document.docx')
# print(result)
# config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
# print(find_default_font("Ani", config_path))
def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs): def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
if not compare_docx_files(file_path1, file_path2): if not compare_docx_files(file_path1, file_path2):
return 0 return 0
@@ -538,12 +526,3 @@ def compare_highlighted_text(file1, file2):
return 1 return 1
else: else:
return 0 return 0
if __name__ == '__main__':
print(
compare_docx_files(
r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\88fe4b2d-3040-4c70-9a70-546a47764b48\CCCH9003_Tutorial_guidelines.docx",
r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\88fe4b2d-3040-4c70-9a70-546a47764b48\CCCH9003_Tutorial_guidelines_Gold.docx",
ignore_blanks=False
)
)

View File

@@ -1,46 +1,47 @@
import builtins
import functools
import itertools
import logging import logging
import operator
import re
import zipfile import zipfile
from typing import Any, TypeVar, Union, Iterable, Optional, Callable from typing import Any, TypeVar, Union, Iterable, Optional, Callable
from typing import Dict, List, Set, Match, Tuple, Pattern from typing import Dict, List, Set, Match, Tuple, Pattern
from urllib.parse import urlparse, urlunparse from urllib.parse import urlparse, urlunparse
import re
import functools
import operator
import builtins
import itertools
import formulas
import lxml.cssselect import lxml.cssselect
import lxml.etree import lxml.etree
import openpyxl
import xmltodict import xmltodict
from lxml.etree import _Element from lxml.etree import _Element
from openpyxl import Workbook from openpyxl import Workbook
from openpyxl.cell.cell import Cell
from openpyxl.chart._chart import ChartBase from openpyxl.chart._chart import ChartBase
from openpyxl.worksheet.worksheet import Worksheet from openpyxl.formatting.formatting import ConditionalFormattingList
from openpyxl.worksheet.filters import AutoFilter, SortState from openpyxl.pivot.cache import CacheSource as PivotCacheSource
from openpyxl.pivot.table import TableDefinition as PivotTableDefinition
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.utils import coordinate_to_tuple, get_column_letter
from openpyxl.worksheet.cell_range import MultiCellRange, CellRange from openpyxl.worksheet.cell_range import MultiCellRange, CellRange
from openpyxl.worksheet.dimensions import DimensionHolder from openpyxl.worksheet.dimensions import DimensionHolder
from openpyxl.formatting.formatting import ConditionalFormattingList from openpyxl.worksheet.filters import AutoFilter, SortState
from openpyxl.utils import coordinate_to_tuple, get_column_letter from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.cell.cell import Cell
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.pivot.table import TableDefinition as PivotTableDefinition
from openpyxl.pivot.cache import CacheSource as PivotCacheSource
import formulas
V = TypeVar("Value") V = TypeVar("Value")
logger = logging.getLogger("desktopenv.metrics.utils") logger = logging.getLogger("desktopenv.metrics.utils")
_xlsx_namespaces = [ ("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main") _xlsx_namespaces = [("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")
, ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main") , ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main") , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
] ]
_xlsx_ns_mapping = dict(_xlsx_namespaces) _xlsx_ns_mapping = dict(_xlsx_namespaces)
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces)) _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None _xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None
_sheet_name_selector = lxml.cssselect.CSSSelector("oo|sheets>oo|sheet", namespaces=_xlsx_ns_mapping) _sheet_name_selector = lxml.cssselect.CSSSelector("oo|sheets>oo|sheet", namespaces=_xlsx_ns_mapping)
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping) _sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]: def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
# function load_sparklines {{{ # # function load_sparklines {{{ #
""" """
@@ -174,6 +175,7 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
return chart_set return chart_set
# }}} function load_charts # # }}} function load_charts #
# Available Pivot Properties: # Available Pivot Properties:
# name: str # name: str
# show_total, show_empty_row, show_empty_col, show_headers: bool # show_total, show_empty_row, show_empty_col, show_headers: bool
@@ -210,23 +212,26 @@ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[s
pivot_set: Dict[str, Any] = {} pivot_set: Dict[str, Any] = {}
pivot_props: Set[str] = set(options.get("pivot_props", [])) pivot_props: Set[str] = set(options.get("pivot_props", []))
for pvt in pivots: for pvt in pivots:
raw_selection: List[List[tuple[Optional[bool], int]]] =\ raw_selection: List[List[tuple[Optional[bool], int]]] = \
[ [(itm.h, itm.x) for itm in f.items if itm.x is not None]\ [[(itm.h, itm.x) for itm in f.items if itm.x is not None] \
for f in pvt.pivotFields for f in pvt.pivotFields
] ]
raw__selection: List[List[tuple[Optional[bool], int]]] = list(itertools.dropwhile(lambda r: len(r)==0, raw_selection)) raw__selection: List[List[tuple[Optional[bool], int]]] = list(
left_bias = len(raw_selection)-len(raw__selection) itertools.dropwhile(lambda r: len(r) == 0, raw_selection))
selection: List[List[tuple[Optional[bool], int]]] = list((itertools.dropwhile(lambda r: len(r)==0, reversed(raw__selection))))[::-1] left_bias = len(raw_selection) - len(raw__selection)
right_bias = len(raw__selection)-len(selection) selection: List[List[tuple[Optional[bool], int]]] = list(
(itertools.dropwhile(lambda r: len(r) == 0, reversed(raw__selection))))[::-1]
right_bias = len(raw__selection) - len(selection)
cache_source: PivotCacheSource = pvt.cache.cacheSource cache_source: PivotCacheSource = pvt.cache.cacheSource
cell_range1: str cell_range1: str
cell_range2: str cell_range2: str
cell_range1, cell_range2 = cache_source.worksheetSource.ref.split(":") cell_range1, cell_range2 = cache_source.worksheetSource.ref.split(":")
cell_range1: Tuple[int, int] = coordinate_to_tuple(cell_range1) cell_range1: Tuple[int, int] = coordinate_to_tuple(cell_range1)
cell_range1 = (cell_range1[0], cell_range1[1]+left_bias) cell_range1 = (cell_range1[0], cell_range1[1] + left_bias)
cell_range2: Tuple[int, int] = coordinate_to_tuple(cell_range2) cell_range2: Tuple[int, int] = coordinate_to_tuple(cell_range2)
cell_range2 = (cell_range2[0], cell_range2[1]-right_bias) cell_range2 = (cell_range2[0], cell_range2[1] - right_bias)
source: str = "{:};{:}:{:};{:}".format(cache_source.type, cell_range1, cell_range2, cache_source.worksheetSource.sheet) source: str = "{:};{:}:{:};{:}".format(cache_source.type, cell_range1, cell_range2,
cache_source.worksheetSource.sheet)
info: Dict[str, Any] = {} info: Dict[str, Any] = {}
if "name" in pivot_props: if "name" in pivot_props:
@@ -248,22 +253,26 @@ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[s
if "filter" in pivot_props: if "filter" in pivot_props:
info["filter_fields"] = set(f.fld for f in pvt.pageFields) info["filter_fields"] = set(f.fld for f in pvt.pageFields)
if "col_fields" in pivot_props: if "col_fields" in pivot_props:
info["col_fields"] = [f.x-left_bias for f in pvt.colFields] info["col_fields"] = [f.x - left_bias for f in pvt.colFields]
if "row_fields" in pivot_props: if "row_fields" in pivot_props:
info["row_fields"] = [f.x-left_bias for f in pvt.rowFields] info["row_fields"] = [f.x - left_bias for f in pvt.rowFields]
if "data_fields" in pivot_props: if "data_fields" in pivot_props:
info["data_fields"] = [ "{:d};{:};{:};{:}".format( f.fld-left_bias, f.name if "data_fields_name" in pivot_props else "" info["data_fields"] = [
, f.subtotal, f.showDataAs "{:d};{:};{:};{:}".format(f.fld - left_bias, f.name if "data_fields_name" in pivot_props else ""
)\ , f.subtotal, f.showDataAs
for f in pvt.dataFields ) \
] for f in pvt.dataFields
]
pivot_set[source] = info pivot_set[source] = info
logger.debug(".[%s].pivots: %s", sheet_name, repr(pivot_set)) logger.debug(".[%s].pivots: %s", sheet_name, repr(pivot_set))
return pivot_set return pivot_set
# }}} function load_pivot_tables # # }}} function load_pivot_tables #
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping) _shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# read_cell_value {{{ # # read_cell_value {{{ #
try: try:
@@ -283,20 +292,20 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f: with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read()) sheet: _Element = lxml.etree.fromstring(f.read())
cells: List[_Element] =\ cells: List[_Element] = \
lxml.cssselect.CSSSelector( 'oo|row>oo|c[r="{:}"]'.format(coordinate) lxml.cssselect.CSSSelector('oo|row>oo|c[r="{:}"]'.format(coordinate)
, namespaces=_xlsx_ns_mapping , namespaces=_xlsx_ns_mapping
)(sheet) )(sheet)
if len(cells)==0: if len(cells) == 0:
return None return None
cell: _Element = cells[0] cell: _Element = cells[0]
except zipfile.BadZipFile: except zipfile.BadZipFile:
return None return None
cell: Dict[str, str] = xmltodict.parse( lxml.etree.tostring(cell, encoding="unicode") cell: Dict[str, str] = xmltodict.parse(lxml.etree.tostring(cell, encoding="unicode")
, process_namespaces=True , process_namespaces=True
, namespaces=_xlsx_ns_imapping , namespaces=_xlsx_ns_imapping
) )
logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell)) logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell))
if "@t" not in cell["c"]: if "@t" not in cell["c"]:
return None return None
@@ -308,6 +317,7 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
return cell["c"]["v"] return cell["c"]["v"]
# }}} read_cell_value # # }}} read_cell_value #
# Supported Styles: # Supported Styles:
# number_format # number_format
# font_name - str # font_name - str
@@ -322,50 +332,53 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# fgcolor - in aRGB, e.g., FF00FFFF is yellow # fgcolor - in aRGB, e.g., FF00FFFF is yellow
# hyperlink - str # hyperlink - str
def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any: def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any:
if style_name=="number_format": if style_name == "number_format":
return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode)\ return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode) \
if cell.value is not None and cell.data_type=="n" else None if cell.value is not None and cell.data_type == "n" else None
elif style_name=="font_name": elif style_name == "font_name":
return (diff_style or cell).font.name if cell.value is not None else None return (diff_style or cell).font.name if cell.value is not None else None
elif style_name=="font_family": elif style_name == "font_family":
return (diff_style or cell).font.family if cell.value is not None else None return (diff_style or cell).font.family if cell.value is not None else None
elif style_name=="font_color": elif style_name == "font_color":
return (diff_style or cell).font.color.rgb if cell.value is not None else None return (diff_style or cell).font.color.rgb if cell.value is not None else None
elif style_name=="font_bold": elif style_name == "font_bold":
return (diff_style or cell).font.bold if cell.value is not None else None return (diff_style or cell).font.bold if cell.value is not None else None
elif style_name=="font_italic": elif style_name == "font_italic":
return (diff_style or cell).font.italic if cell.value is not None else None return (diff_style or cell).font.italic if cell.value is not None else None
elif style_name=="font_underline": elif style_name == "font_underline":
return (diff_style or cell).font.underline if cell.value is not None else None return (diff_style or cell).font.underline if cell.value is not None else None
elif style_name=="font_size": elif style_name == "font_size":
return (diff_style or cell).font.size if cell.value is not None else None return (diff_style or cell).font.size if cell.value is not None else None
elif style_name=="fill_type": elif style_name == "fill_type":
try: try:
return (diff_style or cell).fill.tagname return (diff_style or cell).fill.tagname
except: except:
return None return None
elif style_name=="bgcolor": elif style_name == "bgcolor":
try: try:
return (diff_style or cell).fill.bgColor.rgb return (diff_style or cell).fill.bgColor.rgb
except: except:
return None return None
elif style_name=="fgcolor": elif style_name == "fgcolor":
try: try:
return (diff_style or cell).fill.fgColor.rgb return (diff_style or cell).fill.fgColor.rgb
except: except:
return None return None
elif style_name=="hyperlink": elif style_name == "hyperlink":
return cell.hyperlink or "" if cell.value is not None else None return cell.hyperlink or "" if cell.value is not None else None
else: else:
raise NotImplementedError("Unsupported Style: {:}".format(style_name)) raise NotImplementedError("Unsupported Style: {:}".format(style_name))
_absolute_range_pattern: Pattern[str] = re.compile( r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
_absolute_range_pattern: Pattern[str] = re.compile(r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
(?:: (?::
\$(?P<col2>[A-Z]{1,3})\$(?P<row2>\d+) # coord2 \$(?P<col2>[A-Z]{1,3})\$(?P<row2>\d+) # coord2
)? )?
""" """
, re.X , re.X
) )
def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **options) -> Dict[str, List[Any]]: def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **options) -> Dict[str, List[Any]]:
# function load_xlsx_styles {{{ # # function load_xlsx_styles {{{ #
""" """
@@ -417,24 +430,24 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
if m[2] is None and m[3] is None: if m[2] is None and m[3] is None:
arguments.append(read_cell_value(book_name, sheet_name, coordinate="{:}{:}".format(m[0], m[1]))) arguments.append(read_cell_value(book_name, sheet_name, coordinate="{:}{:}".format(m[0], m[1])))
else: else:
arguments.append( [ read_cell_value( book_name, sheet_name arguments.append([read_cell_value(book_name, sheet_name
, coordinate="{:}{:}".format( get_column_letter(c[1]) , coordinate="{:}{:}".format(get_column_letter(c[1])
, c[0] , c[0]
) )
)\ ) \
for c in CellRange("{:}{:}:{:}{:}".format(m[0], m[1], m[2], m[3])).cells\ for c in CellRange("{:}{:}:{:}{:}".format(m[0], m[1], m[2], m[3])).cells \
] ]
) )
logger.debug("Absolute range arguments: %s", repr(arguments)) logger.debug("Absolute range arguments: %s", repr(arguments))
for rge in fmt.cells: for rge in fmt.cells:
for c in rge.cells: for c in rge.cells:
cell: Cell = worksheet.cell(row=c[0], column=c[1]) cell: Cell = worksheet.cell(row=c[0], column=c[1])
cell_value = read_cell_value( book_name, sheet_name cell_value = read_cell_value(book_name, sheet_name
, coordinate="{:}{:d}".format( get_column_letter(c[1]) , coordinate="{:}{:d}".format(get_column_letter(c[1])
, c[0] , c[0]
) )
) )
if condition(cell_value, *arguments): if condition(cell_value, *arguments):
logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0]) logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0])
active_cells.append(cell) active_cells.append(cell)
@@ -448,6 +461,7 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
return style_dict return style_dict
# }}} function load_xlsx_styles # # }}} function load_xlsx_styles #
# Available Row Properties: # Available Row Properties:
# hidden # hidden
# collapsed # collapsed
@@ -460,7 +474,7 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
# collapsed # collapsed
# min # min
# max # max
def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options)\ def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options) \
-> Dict[Union[int, str], Dict[str, Any]]: -> Dict[Union[int, str], Dict[str, Any]]:
# function load_rows_or_cols {{{ # # function load_rows_or_cols {{{ #
""" """
@@ -491,6 +505,7 @@ def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options)\
return obj_set return obj_set
# }}} function load_rows_or_cols # # }}} function load_rows_or_cols #
def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]: def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
# function load_filters {{{ # # function load_filters {{{ #
try: try:
@@ -514,16 +529,16 @@ def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, A
filter_column["filters"] = set(flt_clm.filters.filter) filter_column["filters"] = set(flt_clm.filters.filter)
if flt_clm.customFilters is not None: if flt_clm.customFilters is not None:
filter_column["custom_filters_op"] = flt_clm.customFilters._and filter_column["custom_filters_op"] = flt_clm.customFilters._and
filter_column["custom_filters"] = set( ( flt.operator filter_column["custom_filters"] = set((flt.operator
, flt.val , flt.val
)\ ) \
for flt in flt_clm.customFilters.customFilter for flt in flt_clm.customFilters.customFilter
) )
filter_column_set.append(filter_column) filter_column_set.append(filter_column)
filter_column_set = list( sorted( filter_column_set filter_column_set = list(sorted(filter_column_set
, key=(lambda d: d["col_id"]) , key=(lambda d: d["col_id"])
) )
) )
filter_dict["filter_column"] = filter_column_set filter_dict["filter_column"] = filter_column_set
# sortState # sortState
@@ -534,26 +549,30 @@ def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, A
sort_state_dict["case"] = sort_state.caseSensitive sort_state_dict["case"] = sort_state.caseSensitive
sort_state_dict["method"] = sort_state.sortMethod sort_state_dict["method"] = sort_state.sortMethod
sort_state_dict["ref"] = sort_state.ref sort_state_dict["ref"] = sort_state.ref
sort_state_dict["condition"] = list( { "descending": cdt.descending sort_state_dict["condition"] = list({"descending": cdt.descending
, "key": cdt.sortBy , "key": cdt.sortBy
, "ref": cdt.ref , "ref": cdt.ref
, "custom_list": cdt.customList , "custom_list": cdt.customList
, "dxf_id": cdt.dxfId , "dxf_id": cdt.dxfId
, "icon": cdt.iconSet , "icon": cdt.iconSet
, "iconid": cdt.iconId , "iconid": cdt.iconId
}\ } \
for cdt in sort_state.sortCondition for cdt in sort_state.sortCondition
) )
filter_dict["sort_state"] = sort_state_dict filter_dict["sort_state"] = sort_state_dict
return filter_dict return filter_dict
# }}} function load_filters # # }}} function load_filters #
def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool: def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
return all(k in item and item[k] == val for k, val in pattern.items()) return all(k in item and item[k] == val for k, val in pattern.items())
def _multicellrange_containsby(subset_candidate: MultiCellRange, superset_candidate: MultiCellRange) -> bool: def _multicellrange_containsby(subset_candidate: MultiCellRange, superset_candidate: MultiCellRange) -> bool:
return all(r in superset_candidate for r in subset_candidate) return all(r in superset_candidate for r in subset_candidate)
def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool: def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
""" """
Args: Args:
@@ -576,10 +595,10 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
match_: Optional[Match[str]] = re.search(rule["ref"], value, flag) match_: Optional[Match[str]] = re.search(rule["ref"], value, flag)
return match_ is not None return match_ is not None
if rule["method"] in { "eq", "ne" if rule["method"] in {"eq", "ne"
, "le", "lt" , "le", "lt"
, "ge", "gt" , "ge", "gt"
}: }:
return getattr(operator, rule["method"])(value, rule["ref"]) return getattr(operator, rule["method"])(value, rule["ref"])
if rule["method"].startswith("approx"): if rule["method"].startswith("approx"):
threshold: float = float(rule["method"].split(":")[1]) threshold: float = float(rule["method"].split(":")[1])
@@ -589,26 +608,27 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
except (ValueError, TypeError): except (ValueError, TypeError):
return False return False
else: else:
return abs(value-rule["ref"])<=threshold return abs(value - rule["ref"]) <= threshold
if rule["method"] == "spreadsheet_range": if rule["method"] == "spreadsheet_range":
subset_limit = MultiCellRange(rule["ref"][0]) subset_limit = MultiCellRange(rule["ref"][0])
superset_limit = MultiCellRange(rule["ref"][1]) superset_limit = MultiCellRange(rule["ref"][1])
return _multicellrange_containsby(subset_limit, value)\ return _multicellrange_containsby(subset_limit, value) \
and _multicellrange_containsby(value, superset_limit) and _multicellrange_containsby(value, superset_limit)
if rule["method"].startswith("range."): # e.g., range.te [0, 2] -> 0 < x <= 2 if rule["method"].startswith("range."): # e.g., range.te [0, 2] -> 0 < x <= 2
left_et = rule["method"][6] left_et = rule["method"][6]
right_et = rule["method"][7] right_et = rule["method"][7]
return getattr(operator, "l" + left_et)(rule["ref"][0], value)\ return getattr(operator, "l" + left_et)(rule["ref"][0], value) \
and getattr(operator, "l" + right_et)(value, rule["ref"][1]) and getattr(operator, "l" + right_et)(value, rule["ref"][1])
if rule["method"] in {"str_list_eq", "str_set_eq"}: if rule["method"] in {"str_list_eq", "str_set_eq"}:
container_type_str: str = rule["method"][4:-3] container_type_str: str = rule["method"][4:-3]
container_type = getattr(builtins, container_type_str) container_type = getattr(builtins, container_type_str)
value: container_type = container_type(value.strip("\"'").split(",")) value: container_type = container_type(value.strip("\"'").split(","))
ref: container_type = container_type(rule["ref"]) ref: container_type = container_type(rule["ref"])
return value==ref return value == ref
raise NotImplementedError() raise NotImplementedError()
def are_lists_equal(list1, list2, comparison_func): def are_lists_equal(list1, list2, comparison_func):
# First check if both lists have the same length # First check if both lists have the same length
if len(list1) != len(list2): if len(list1) != len(list2):
@@ -652,114 +672,3 @@ def compare_urls(url1, url2):
# Compare the normalized URLs # Compare the normalized URLs
return norm_url1 == norm_url2 return norm_url1 == norm_url2
if __name__ == "__main__":
path1 = "test.xlsx"
#path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
path1 = "../../任务集/SheetCopilot/dataset/task_sheet_answers_v2/BoomerangSales/2_BoomerangSales/2_BoomerangSales_gt1.xlsx"
workbook1: Workbook = openpyxl.load_workbook(filename=path1)
worksheet1: Worksheet = workbook1.active
#charts: List[ChartBase] = worksheet1._charts
# print(len(charts))
# print(type(charts[0]))
#
# print(len(charts[0].series))
# print(type(charts[0].series[0]))
# print(type(charts[0].series[0].val))
##print(charts[0].series[0].val)
# print(charts[0].series[0].val.numRef.f)
#
# print(type(charts[0].series[0].cat))
##print(charts[0].series[0].cat)
# print(charts[0].series[0].cat.numRef)
# print(charts[0].series[0].cat.strRef)
# print(charts[0].series[0].cat.strRef.f)
# print(type(charts[0].title.tx.strRef))
# print(type(charts[0].title.tx.rich))
# print(type(charts[0].title.txPr))
# print(len(charts[0].title.tx.rich.p))
# print(len(charts[0].title.tx.rich.p[0].r))
# print(type(charts[0].title.tx.rich.p[0].r[0]))
# print(type(charts[0].title.tx.rich.p[0].r[0].t))
# print(charts[0].title.tx.rich.p[0].r[0].t)
# print(type(charts[0].anchor))
# print(charts[0].anchor.editAs)
# print(charts[0].anchor._from.col, charts[0].anchor.to.row)
# print(charts[0].anchor.to.col, charts[0].anchor.to.row)
# df1 = pd.read_excel(path1)
# print(df1)
#print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))
#print(type(worksheet1["A1"].hyperlink))
#print(worksheet1["A1"].hyperlink)
#print(worksheet1._charts[0].legend)
#print(worksheet1._charts[0].legend.position)
#for entr in worksheet1._charts[0].legend.legendEntry:
#print("Entr", entr.txPr.p[0].r[0].t)
#print(load_filters(workbook1, "工作表1"))
#print(worksheet1.auto_filter)
#for pvt in worksheet1._pivots:
##print(type(pvt))
##print(pvt)
#print(type(pvt.cache))
##print(pvt.cache)
#print(pvt.cache.cacheSource.type)
#print(pvt.cache.cacheSource.worksheetSource.ref)
#print(pvt.cache.cacheSource.worksheetSource.sheet)
#
#print(type(pvt.location))
#print(pvt.location)
#for f in pvt.pivotFields:
#print(type(f))
#print([(itm.h, itm.x) for itm in f.items])
##for f_itm in f.items:
##print(f_itm.n)
##print(f_itm.t)
##print(f_itm.h)
##print(f_itm.s)
##print(f_itm.sd)
##print(f_itm.f)
##print(f_itm.m)
##print(f_itm.c)
##print(f_itm.x)
##print(f_itm.d)
##print(f_itm.e)
##print(f.countASubtotal)
##print(f.countSubtotal)
##for f in pvt.dataFields:
##print(f.name)
##print(f.fld)
###print(f.baseField)
##print(f.subtotal)
##print(f.showDataAs)
##for f in pvt.rowFields:
##print(1, f.x)
##for f in pvt.rowItems:
##print(2, f.t, f.r, f.i, f.x)
##for f in pvt.colFields:
##print(3, f.x)
##for f in pvt.colItems:
##print(4, f.t, f.r, f.i, f.x)
#for f in pvt.pageFields:
#print(5, f.fld)
#for flt in pvt.filters:
#print(5, flt.fld)
#print(6, flt.mpFld)
#print(7, flt.type)
#print(8, flt.evalOrder)
#print(9, flt.id)
#print(10, flt.stringValue1)
#print(11, flt.stringValue2)
#print(load_charts(workbook1, "Sheet2", chart_props=["title", "type", "legend"]))
#print(load_filters(workbook1, "透视表_工作表1_1"))
#workbook1.save("test2.xlsx")
print( load_pivot_tables( workbook1, "Sheet2", pivot_props=[ "col_fields"
, "filter"
, "row_fields"
, "data_fields"
]
)
)