Calc eval fix (#273)
* ver Jun17th updating annotations * ver Jun17th corrected annotation of 1d17 added check for cell merge * ver Jun17th updated several annotations * ver Jun20th fixed set-up config of 2bd59342-0664-4ccb-ba87-79379096cc08 * fix: Enhance instructions in LibreOffice Calc examples for clarity and specificity, including details on using Pivot Tables, column placements, and revenue calculations. * ver Jun21st updating calc evals * ver Jun22nd fixed an impress task * ver Jun22ndv2 adjusted several calc tasks * Clean scalfolds * ver Jul18th added two try-excepts to handle possible formula parsing and calculation failures * ver Jul19th added supports for cellIs and some other new types of conditional formatting for calc evaluation --------- Co-authored-by: BowenBryanWang <bryanwang.nlp@connect.hku.hk> Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
This commit is contained in:
@@ -346,6 +346,8 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
|||||||
return cell["c"]["v"]
|
return cell["c"]["v"]
|
||||||
if cell["c"]["@t"] == "inlineStr":
|
if cell["c"]["@t"] == "inlineStr":
|
||||||
return cell["c"]["is"]["t"]
|
return cell["c"]["is"]["t"]
|
||||||
|
if cell["c"]["@t"] == "e":
|
||||||
|
return cell["c"]["v"]
|
||||||
except (KeyError, ValueError):
|
except (KeyError, ValueError):
|
||||||
return None
|
return None
|
||||||
# }}} read_cell_value #
|
# }}} read_cell_value #
|
||||||
@@ -409,6 +411,43 @@ def _read_cell_style(style_name: str, cell: Union[Cell, MergedCell], diff_style:
|
|||||||
else:
|
else:
|
||||||
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
|
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
|
||||||
|
|
||||||
|
def _process_xlsx_cf_operator(operator: str, value: Any, ref: List[Any]) -> bool:
|
||||||
|
# function _process_xlsx_cf_operator {{{ #
|
||||||
|
# "containsText", "lessThanOrEqual", "notBetween", "lessThan", "notContains", "beginsWith", "equal", "greaterThanOrEqual", "between", "endsWith", "notEqual", "greaterThan"
|
||||||
|
try:
|
||||||
|
if operator=="lessThanOrEqual":
|
||||||
|
result: bool = value<=ref[0]
|
||||||
|
elif operator=="lessThan":
|
||||||
|
result: bool = value<ref[0]
|
||||||
|
elif operator=="equal":
|
||||||
|
result: bool = value==ref[0]
|
||||||
|
elif operator=="greaterThanOrEqual":
|
||||||
|
result: bool = value>=ref[0]
|
||||||
|
elif operator=="notEqual":
|
||||||
|
result: bool = value!=ref[0]
|
||||||
|
elif operator=="greaterThan":
|
||||||
|
result: bool = value>ref[0]
|
||||||
|
elif operator=="between":
|
||||||
|
small_one: float
|
||||||
|
large_one: float
|
||||||
|
small_one, large_one = min(ref), max(ref)
|
||||||
|
result: bool = value>=small_one and value<=large_one
|
||||||
|
elif operator=="notBetween":
|
||||||
|
small_one: float
|
||||||
|
large_one: float
|
||||||
|
small_one, large_one = min(ref), max(ref)
|
||||||
|
result: bool = value<small_one or value>large_one
|
||||||
|
else:
|
||||||
|
#raise NotImplementedError("Not Implemented CondFormat Operator: {:}".format(operator))
|
||||||
|
logger.exception("Not Implemented CondFormat Operator: {:}".format(operator))
|
||||||
|
return result
|
||||||
|
except TypeError:
|
||||||
|
logger.exception("Unmatched type of %s and %s. Auto to False", repr(value), repr(ref))
|
||||||
|
return False
|
||||||
|
except IndexError:
|
||||||
|
logger.exception("ref array doesn't have enough elements. Auto to False: %s", repr(ref))
|
||||||
|
return False
|
||||||
|
# }}} function _process_xlsx_cf_operator #
|
||||||
|
|
||||||
_absolute_range_pattern: Pattern[str] = re.compile(r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
|
_absolute_range_pattern: Pattern[str] = re.compile(r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
|
||||||
(?::
|
(?::
|
||||||
@@ -459,16 +498,23 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
|
|||||||
for fmt in conditional_formattings:
|
for fmt in conditional_formattings:
|
||||||
for r in fmt.rules:
|
for r in fmt.rules:
|
||||||
active_cells: List[Cell] = []
|
active_cells: List[Cell] = []
|
||||||
if r.type == "expression":
|
|
||||||
|
# Process CF Formulae {{{ #
|
||||||
|
formulae: List[Callable[[Any], Any]] = []
|
||||||
|
argument_lists: List[List[Any]] = []
|
||||||
|
has_error = False
|
||||||
|
for fml in r.formula:
|
||||||
try:
|
try:
|
||||||
condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile()
|
formula_func: Callable[[Any], Any] =\
|
||||||
|
formula_parser.ast("=" + fml)[1].compile()
|
||||||
|
logger.debug("CondFormat rule formula: %s", fml)
|
||||||
except:
|
except:
|
||||||
logger.exception("Formula parsing error: %s. Skipping.", repr(r.formula[0]))
|
logger.exception("Formula parsing error: %s. Skipping.", repr(fml))
|
||||||
continue
|
has_error = True
|
||||||
logger.debug("Expression condition: %s", r.formula[0])
|
break
|
||||||
|
|
||||||
arguments: List[Any] = []
|
arguments: List[Any] = []
|
||||||
absolute_range_match: List[Tuple[str, str, str, str]] = _absolute_range_pattern.findall(r.formula[0])
|
absolute_range_match: List[Tuple[str, str, str, str]] = _absolute_range_pattern.findall(fml)
|
||||||
for m in absolute_range_match:
|
for m in absolute_range_match:
|
||||||
logger.debug("Absolute ranges: %s", repr(m))
|
logger.debug("Absolute ranges: %s", repr(m))
|
||||||
if m[2] is None and m[3] is None:
|
if m[2] is None and m[3] is None:
|
||||||
@@ -484,31 +530,65 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
|
|||||||
)
|
)
|
||||||
logger.debug("Absolute range arguments: %s", repr(arguments))
|
logger.debug("Absolute range arguments: %s", repr(arguments))
|
||||||
|
|
||||||
nb_contiguous_nothings = 0
|
formulae.append(formula_func)
|
||||||
for rge in fmt.cells:
|
argument_lists.append(arguments)
|
||||||
for c in rge.cells:
|
|
||||||
cell: Cell = worksheet.cell(row=c[0], column=c[1])
|
if has_error:
|
||||||
cell_value = read_cell_value(book_name, sheet_name
|
continue
|
||||||
, coordinate="{:}{:d}".format(get_column_letter(c[1])
|
# }}} Process CF Formulae #
|
||||||
, c[0]
|
|
||||||
)
|
# Process Condition Accroding to Type {{{ #
|
||||||
)
|
if r.type in { "expression"
|
||||||
if cell_value is None:
|
, "containsText", "notContainsText"
|
||||||
nb_contiguous_nothings += 1
|
, "endsWith", "beginsWith"
|
||||||
if nb_contiguous_nothings>50:
|
, "containsErrors", "notContainsErrors"
|
||||||
break
|
}:
|
||||||
continue
|
condition: Callable[[Any], bool] = formulae[0]
|
||||||
else:
|
arguments: List[Any] = argument_lists[0]
|
||||||
try:
|
is_active: Callable[[Any], bool] = lambda v: condition(v, *arguments)
|
||||||
satisfies_condition: bool = condition(cell_value, *arguments)
|
elif r.type == "cellIs":
|
||||||
except:
|
operator: str = r.operator
|
||||||
logger.exception("Error in formula calculation with cell value %d", repr(cell_value))
|
try:
|
||||||
satisfies_condition = False
|
references: List[Any] = [fml() for fml in formulae]
|
||||||
if satisfies_condition:
|
except:
|
||||||
logger.debug("Active Cell %s(%s) for %s", repr(cell), repr(cell_value), r.formula[0])
|
logger.exception("Error occurs while calculating reference values for cellIs condition formatting.")
|
||||||
active_cells.append(cell)
|
continue
|
||||||
|
is_active: Callable[[Any], bool] =\
|
||||||
|
lambda v: _process_xlsx_cf_operator(operator, v, references)
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
|
#raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
|
||||||
|
# e.g., type=top10 (rank=number, percent=bool, bottom=bool)
|
||||||
|
# type=aboveAverage (equalAverage=bool, aboveAverage=bool)
|
||||||
|
# type=duplicateValues / type=uniqueValues
|
||||||
|
logger.exception("Not Implemented Condition Type: {:}".format(r.type))
|
||||||
|
# }}} Process Condition Accroding to Type #
|
||||||
|
|
||||||
|
|
||||||
|
# Test Each Cell {{{ #
|
||||||
|
nb_contiguous_nothings = 0
|
||||||
|
for rge in fmt.cells:
|
||||||
|
for c in rge.cells:
|
||||||
|
cell: Cell = worksheet.cell(row=c[0], column=c[1])
|
||||||
|
cell_value = read_cell_value(book_name, sheet_name
|
||||||
|
, coordinate="{:}{:d}".format(get_column_letter(c[1])
|
||||||
|
, c[0]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if cell_value is None:
|
||||||
|
nb_contiguous_nothings += 1
|
||||||
|
if nb_contiguous_nothings>50:
|
||||||
|
break
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
satisfies_condition: bool = is_active(cell_value)
|
||||||
|
except:
|
||||||
|
logger.exception("Error in formula calculation with cell value %d", repr(cell_value))
|
||||||
|
satisfies_condition = False
|
||||||
|
if satisfies_condition:
|
||||||
|
logger.debug("Active Cell %s(%s) for %s", repr(cell), repr(cell_value), r.formula[0])
|
||||||
|
active_cells.append(cell)
|
||||||
|
# }}} Test Each Cell #
|
||||||
|
|
||||||
for c in active_cells:
|
for c in active_cells:
|
||||||
style_dict[c.coordinate] = [_read_cell_style(st, c, r.dxf) for st in concerned_styles]
|
style_dict[c.coordinate] = [_read_cell_style(st, c, r.dxf) for st in concerned_styles]
|
||||||
|
|||||||
Reference in New Issue
Block a user