Calc eval fix (#273)

* ver Jun17th

updating annotations

* ver Jun17th

corrected annotation of 1d17
added check for cell merge

* ver Jun17th

updated several annotations

* ver Jun20th

fixed set-up config of 2bd59342-0664-4ccb-ba87-79379096cc08

* fix: Enhance instructions in LibreOffice Calc examples for clarity and specificity, including details on using Pivot Tables, column placements, and revenue calculations.

* ver Jun21st

updating calc evals

* ver Jun22nd

fixed an impress task

* ver Jun22ndv2

adjusted several calc tasks

* Clean scalfolds

* ver Jul18th

added two try-excepts to handle possible formula parsing and calculation
failures

* ver Jul19th

added supports for cellIs and some other new types of conditional
formatting for calc evaluation

---------

Co-authored-by: BowenBryanWang <bryanwang.nlp@connect.hku.hk>
Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
This commit is contained in:
Danyang Zhang
2025-07-19 17:15:40 +08:00
committed by GitHub
parent c6c62c52d7
commit bec7129fff

View File

@@ -346,6 +346,8 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
return cell["c"]["v"]
if cell["c"]["@t"] == "inlineStr":
return cell["c"]["is"]["t"]
if cell["c"]["@t"] == "e":
return cell["c"]["v"]
except (KeyError, ValueError):
return None
# }}} read_cell_value #
@@ -409,6 +411,43 @@ def _read_cell_style(style_name: str, cell: Union[Cell, MergedCell], diff_style:
else:
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
def _process_xlsx_cf_operator(operator: str, value: Any, ref: List[Any]) -> bool:
# function _process_xlsx_cf_operator {{{ #
# "containsText", "lessThanOrEqual", "notBetween", "lessThan", "notContains", "beginsWith", "equal", "greaterThanOrEqual", "between", "endsWith", "notEqual", "greaterThan"
try:
if operator=="lessThanOrEqual":
result: bool = value<=ref[0]
elif operator=="lessThan":
result: bool = value<ref[0]
elif operator=="equal":
result: bool = value==ref[0]
elif operator=="greaterThanOrEqual":
result: bool = value>=ref[0]
elif operator=="notEqual":
result: bool = value!=ref[0]
elif operator=="greaterThan":
result: bool = value>ref[0]
elif operator=="between":
small_one: float
large_one: float
small_one, large_one = min(ref), max(ref)
result: bool = value>=small_one and value<=large_one
elif operator=="notBetween":
small_one: float
large_one: float
small_one, large_one = min(ref), max(ref)
result: bool = value<small_one or value>large_one
else:
#raise NotImplementedError("Not Implemented CondFormat Operator: {:}".format(operator))
logger.exception("Not Implemented CondFormat Operator: {:}".format(operator))
return result
except TypeError:
logger.exception("Unmatched type of %s and %s. Auto to False", repr(value), repr(ref))
return False
except IndexError:
logger.exception("ref array doesn't have enough elements. Auto to False: %s", repr(ref))
return False
# }}} function _process_xlsx_cf_operator #
_absolute_range_pattern: Pattern[str] = re.compile(r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
(?::
@@ -459,16 +498,23 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
for fmt in conditional_formattings:
for r in fmt.rules:
active_cells: List[Cell] = []
if r.type == "expression":
# Process CF Formulae {{{ #
formulae: List[Callable[[Any], Any]] = []
argument_lists: List[List[Any]] = []
has_error = False
for fml in r.formula:
try:
condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile()
formula_func: Callable[[Any], Any] =\
formula_parser.ast("=" + fml)[1].compile()
logger.debug("CondFormat rule formula: %s", fml)
except:
logger.exception("Formula parsing error: %s. Skipping.", repr(r.formula[0]))
continue
logger.debug("Expression condition: %s", r.formula[0])
logger.exception("Formula parsing error: %s. Skipping.", repr(fml))
has_error = True
break
arguments: List[Any] = []
absolute_range_match: List[Tuple[str, str, str, str]] = _absolute_range_pattern.findall(r.formula[0])
absolute_range_match: List[Tuple[str, str, str, str]] = _absolute_range_pattern.findall(fml)
for m in absolute_range_match:
logger.debug("Absolute ranges: %s", repr(m))
if m[2] is None and m[3] is None:
@@ -484,31 +530,65 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
)
logger.debug("Absolute range arguments: %s", repr(arguments))
nb_contiguous_nothings = 0
for rge in fmt.cells:
for c in rge.cells:
cell: Cell = worksheet.cell(row=c[0], column=c[1])
cell_value = read_cell_value(book_name, sheet_name
, coordinate="{:}{:d}".format(get_column_letter(c[1])
, c[0]
)
)
if cell_value is None:
nb_contiguous_nothings += 1
if nb_contiguous_nothings>50:
break
continue
else:
try:
satisfies_condition: bool = condition(cell_value, *arguments)
except:
logger.exception("Error in formula calculation with cell value %d", repr(cell_value))
satisfies_condition = False
if satisfies_condition:
logger.debug("Active Cell %s(%s) for %s", repr(cell), repr(cell_value), r.formula[0])
active_cells.append(cell)
formulae.append(formula_func)
argument_lists.append(arguments)
if has_error:
continue
# }}} Process CF Formulae #
# Process Condition Accroding to Type {{{ #
if r.type in { "expression"
, "containsText", "notContainsText"
, "endsWith", "beginsWith"
, "containsErrors", "notContainsErrors"
}:
condition: Callable[[Any], bool] = formulae[0]
arguments: List[Any] = argument_lists[0]
is_active: Callable[[Any], bool] = lambda v: condition(v, *arguments)
elif r.type == "cellIs":
operator: str = r.operator
try:
references: List[Any] = [fml() for fml in formulae]
except:
logger.exception("Error occurs while calculating reference values for cellIs condition formatting.")
continue
is_active: Callable[[Any], bool] =\
lambda v: _process_xlsx_cf_operator(operator, v, references)
else:
raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
#raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
# e.g., type=top10 (rank=number, percent=bool, bottom=bool)
# type=aboveAverage (equalAverage=bool, aboveAverage=bool)
# type=duplicateValues / type=uniqueValues
logger.exception("Not Implemented Condition Type: {:}".format(r.type))
# }}} Process Condition Accroding to Type #
# Test Each Cell {{{ #
nb_contiguous_nothings = 0
for rge in fmt.cells:
for c in rge.cells:
cell: Cell = worksheet.cell(row=c[0], column=c[1])
cell_value = read_cell_value(book_name, sheet_name
, coordinate="{:}{:d}".format(get_column_letter(c[1])
, c[0]
)
)
if cell_value is None:
nb_contiguous_nothings += 1
if nb_contiguous_nothings>50:
break
continue
else:
try:
satisfies_condition: bool = is_active(cell_value)
except:
logger.exception("Error in formula calculation with cell value %d", repr(cell_value))
satisfies_condition = False
if satisfies_condition:
logger.debug("Active Cell %s(%s) for %s", repr(cell), repr(cell_value), r.formula[0])
active_cells.append(cell)
# }}} Test Each Cell #
for c in active_cells:
style_dict[c.coordinate] = [_read_cell_style(st, c, r.dxf) for st in concerned_styles]