Calc eval fix (#225)
* ver Jun17th updating annotations * ver Jun17th corrected annotation of 1d17 added check for cell merge * ver Jun17th updated several annotations * ver Jun20th fixed set-up config of 2bd59342-0664-4ccb-ba87-79379096cc08 * fix: Enhance instructions in LibreOffice Calc examples for clarity and specificity, including details on using Pivot Tables, column placements, and revenue calculations. * ver Jun21st updating calc evals * ver Jun22nd fixed an impress task * ver Jun22ndv2 adjusted several calc tasks * Clean scalfolds --------- Co-authored-by: BowenBryanWang <bryanwang.nlp@connect.hku.hk> Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
This commit is contained in:
@@ -1,12 +1,12 @@
|
||||
import builtins
|
||||
import datetime
|
||||
#import datetime
|
||||
import functools
|
||||
import itertools
|
||||
import logging
|
||||
import operator
|
||||
import re
|
||||
import zipfile
|
||||
import pandas as pd
|
||||
#import pandas as pd
|
||||
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
|
||||
from typing import Dict, List, Set, Match, Tuple, Pattern
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
@@ -17,7 +17,7 @@ import lxml.etree
|
||||
import xmltodict
|
||||
from lxml.etree import _Element
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.cell.cell import Cell
|
||||
from openpyxl.cell.cell import Cell, MergedCell
|
||||
from openpyxl.chart._chart import ChartBase
|
||||
from openpyxl.formatting.formatting import ConditionalFormattingList
|
||||
from openpyxl.pivot.cache import CacheSource as PivotCacheSource
|
||||
@@ -292,7 +292,9 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
||||
for elm in str_elements
|
||||
]
|
||||
except:
|
||||
#logger.exception("Read shared strings error: %s", xlsx_file)
|
||||
logger.debug("Read shared strings error: %s", xlsx_file)
|
||||
shared_strs: List[str] = []
|
||||
|
||||
with z_f.open("xl/workbook.xml") as f:
|
||||
workbook_database: _Element = lxml.etree.fromstring(f.read())
|
||||
@@ -315,6 +317,7 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
||||
, process_namespaces=True
|
||||
, namespaces=_xlsx_ns_imapping
|
||||
)
|
||||
logger.debug("%s.shared_strings: %s", xlsx_file, repr(shared_strs))
|
||||
logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell))
|
||||
try:
|
||||
if "@t" not in cell["c"] or cell["c"]["@t"] == "n":
|
||||
@@ -323,6 +326,8 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
||||
return shared_strs[int(cell["c"]["v"])]
|
||||
if cell["c"]["@t"] == "str":
|
||||
return cell["c"]["v"]
|
||||
if cell["c"]["@t"] == "inlineStr":
|
||||
return cell["c"]["is"]["t"]
|
||||
except (KeyError, ValueError):
|
||||
return None
|
||||
# }}} read_cell_value #
|
||||
@@ -338,10 +343,11 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
||||
# font_underline - "single" | "double" | "singleAccounting" | "doubleAccounting"
|
||||
# font_size - float
|
||||
# fill_type - "patternFill" | "gradientFill"
|
||||
# bgcolor - in aRGB, e.g., FFFF0000 is red
|
||||
# fgcolor - in aRGB, e.g., FF00FFFF is yellow
|
||||
# bgcolor - in aRGB, e.g., FFFF0000 is red; This property seems to be ambiguous with fgcolor in xlsx, strange
|
||||
# fgcolor - in aRGB, e.g., FF00FFFF is yellow # Deprecated
|
||||
# hyperlink - str
|
||||
def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any:
|
||||
# merge - bool, if the cell is in a merged range and is not the first cell in the merged range
|
||||
def _read_cell_style(style_name: str, cell: Union[Cell, MergedCell], diff_style: Optional[DifferentialStyle] = None) -> Any:
|
||||
if style_name == "number_format":
|
||||
return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode) \
|
||||
if cell.value is not None and cell.data_type == "n" else None
|
||||
@@ -364,18 +370,24 @@ def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[Different
|
||||
return (diff_style or cell).fill.tagname
|
||||
except:
|
||||
return None
|
||||
elif style_name == "bgcolor":
|
||||
elif style_name == "bgcolor" or style_name == "fgcolor":
|
||||
try:
|
||||
return (diff_style or cell).fill.bgColor.rgb
|
||||
except:
|
||||
return None
|
||||
elif style_name == "fgcolor":
|
||||
try:
|
||||
return (diff_style or cell).fill.fgColor.rgb
|
||||
#return (diff_style or cell).fill.bgColor.rgb
|
||||
if diff_style is not None:
|
||||
return diff_style.fill.bgColor.rgb
|
||||
else:
|
||||
return cell.fill.fgColor.rgb
|
||||
except:
|
||||
return None
|
||||
#elif style_name == "fgcolor":
|
||||
#try:
|
||||
#return (diff_style or cell).fill.fgColor.rgb
|
||||
#except:
|
||||
#return None
|
||||
elif style_name == "hyperlink":
|
||||
return cell.hyperlink or "" if cell.value is not None else None
|
||||
elif style_name == "merge":
|
||||
return isinstance(cell, MergedCell)
|
||||
else:
|
||||
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
|
||||
|
||||
@@ -450,6 +462,7 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
|
||||
)
|
||||
logger.debug("Absolute range arguments: %s", repr(arguments))
|
||||
|
||||
nb_contiguous_nothings = 0
|
||||
for rge in fmt.cells:
|
||||
for c in rge.cells:
|
||||
cell: Cell = worksheet.cell(row=c[0], column=c[1])
|
||||
@@ -458,7 +471,12 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
|
||||
, c[0]
|
||||
)
|
||||
)
|
||||
if condition(cell_value, *arguments):
|
||||
if cell_value is None:
|
||||
nb_contiguous_nothings += 1
|
||||
if nb_contiguous_nothings>50:
|
||||
break
|
||||
continue
|
||||
elif condition(cell_value, *arguments):
|
||||
logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0])
|
||||
active_cells.append(cell)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user