Calc eval fix (#273)
* ver Jun17th updating annotations * ver Jun17th corrected annotation of 1d17 added check for cell merge * ver Jun17th updated several annotations * ver Jun20th fixed set-up config of 2bd59342-0664-4ccb-ba87-79379096cc08 * fix: Enhance instructions in LibreOffice Calc examples for clarity and specificity, including details on using Pivot Tables, column placements, and revenue calculations. * ver Jun21st updating calc evals * ver Jun22nd fixed an impress task * ver Jun22ndv2 adjusted several calc tasks * Clean scalfolds * ver Jul18th added two try-excepts to handle possible formula parsing and calculation failures * ver Jul19th added supports for cellIs and some other new types of conditional formatting for calc evaluation --------- Co-authored-by: BowenBryanWang <bryanwang.nlp@connect.hku.hk> Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
This commit is contained in:
@@ -346,6 +346,8 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
||||
return cell["c"]["v"]
|
||||
if cell["c"]["@t"] == "inlineStr":
|
||||
return cell["c"]["is"]["t"]
|
||||
if cell["c"]["@t"] == "e":
|
||||
return cell["c"]["v"]
|
||||
except (KeyError, ValueError):
|
||||
return None
|
||||
# }}} read_cell_value #
|
||||
@@ -409,6 +411,43 @@ def _read_cell_style(style_name: str, cell: Union[Cell, MergedCell], diff_style:
|
||||
else:
|
||||
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
|
||||
|
||||
def _process_xlsx_cf_operator(operator: str, value: Any, ref: List[Any]) -> bool:
|
||||
# function _process_xlsx_cf_operator {{{ #
|
||||
# "containsText", "lessThanOrEqual", "notBetween", "lessThan", "notContains", "beginsWith", "equal", "greaterThanOrEqual", "between", "endsWith", "notEqual", "greaterThan"
|
||||
try:
|
||||
if operator=="lessThanOrEqual":
|
||||
result: bool = value<=ref[0]
|
||||
elif operator=="lessThan":
|
||||
result: bool = value<ref[0]
|
||||
elif operator=="equal":
|
||||
result: bool = value==ref[0]
|
||||
elif operator=="greaterThanOrEqual":
|
||||
result: bool = value>=ref[0]
|
||||
elif operator=="notEqual":
|
||||
result: bool = value!=ref[0]
|
||||
elif operator=="greaterThan":
|
||||
result: bool = value>ref[0]
|
||||
elif operator=="between":
|
||||
small_one: float
|
||||
large_one: float
|
||||
small_one, large_one = min(ref), max(ref)
|
||||
result: bool = value>=small_one and value<=large_one
|
||||
elif operator=="notBetween":
|
||||
small_one: float
|
||||
large_one: float
|
||||
small_one, large_one = min(ref), max(ref)
|
||||
result: bool = value<small_one or value>large_one
|
||||
else:
|
||||
#raise NotImplementedError("Not Implemented CondFormat Operator: {:}".format(operator))
|
||||
logger.exception("Not Implemented CondFormat Operator: {:}".format(operator))
|
||||
return result
|
||||
except TypeError:
|
||||
logger.exception("Unmatched type of %s and %s. Auto to False", repr(value), repr(ref))
|
||||
return False
|
||||
except IndexError:
|
||||
logger.exception("ref array doesn't have enough elements. Auto to False: %s", repr(ref))
|
||||
return False
|
||||
# }}} function _process_xlsx_cf_operator #
|
||||
|
||||
_absolute_range_pattern: Pattern[str] = re.compile(r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
|
||||
(?::
|
||||
@@ -459,16 +498,23 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
|
||||
for fmt in conditional_formattings:
|
||||
for r in fmt.rules:
|
||||
active_cells: List[Cell] = []
|
||||
if r.type == "expression":
|
||||
|
||||
# Process CF Formulae {{{ #
|
||||
formulae: List[Callable[[Any], Any]] = []
|
||||
argument_lists: List[List[Any]] = []
|
||||
has_error = False
|
||||
for fml in r.formula:
|
||||
try:
|
||||
condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile()
|
||||
formula_func: Callable[[Any], Any] =\
|
||||
formula_parser.ast("=" + fml)[1].compile()
|
||||
logger.debug("CondFormat rule formula: %s", fml)
|
||||
except:
|
||||
logger.exception("Formula parsing error: %s. Skipping.", repr(r.formula[0]))
|
||||
continue
|
||||
logger.debug("Expression condition: %s", r.formula[0])
|
||||
logger.exception("Formula parsing error: %s. Skipping.", repr(fml))
|
||||
has_error = True
|
||||
break
|
||||
|
||||
arguments: List[Any] = []
|
||||
absolute_range_match: List[Tuple[str, str, str, str]] = _absolute_range_pattern.findall(r.formula[0])
|
||||
absolute_range_match: List[Tuple[str, str, str, str]] = _absolute_range_pattern.findall(fml)
|
||||
for m in absolute_range_match:
|
||||
logger.debug("Absolute ranges: %s", repr(m))
|
||||
if m[2] is None and m[3] is None:
|
||||
@@ -484,31 +530,65 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
|
||||
)
|
||||
logger.debug("Absolute range arguments: %s", repr(arguments))
|
||||
|
||||
nb_contiguous_nothings = 0
|
||||
for rge in fmt.cells:
|
||||
for c in rge.cells:
|
||||
cell: Cell = worksheet.cell(row=c[0], column=c[1])
|
||||
cell_value = read_cell_value(book_name, sheet_name
|
||||
, coordinate="{:}{:d}".format(get_column_letter(c[1])
|
||||
, c[0]
|
||||
)
|
||||
)
|
||||
if cell_value is None:
|
||||
nb_contiguous_nothings += 1
|
||||
if nb_contiguous_nothings>50:
|
||||
break
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
satisfies_condition: bool = condition(cell_value, *arguments)
|
||||
except:
|
||||
logger.exception("Error in formula calculation with cell value %d", repr(cell_value))
|
||||
satisfies_condition = False
|
||||
if satisfies_condition:
|
||||
logger.debug("Active Cell %s(%s) for %s", repr(cell), repr(cell_value), r.formula[0])
|
||||
active_cells.append(cell)
|
||||
formulae.append(formula_func)
|
||||
argument_lists.append(arguments)
|
||||
|
||||
if has_error:
|
||||
continue
|
||||
# }}} Process CF Formulae #
|
||||
|
||||
# Process Condition Accroding to Type {{{ #
|
||||
if r.type in { "expression"
|
||||
, "containsText", "notContainsText"
|
||||
, "endsWith", "beginsWith"
|
||||
, "containsErrors", "notContainsErrors"
|
||||
}:
|
||||
condition: Callable[[Any], bool] = formulae[0]
|
||||
arguments: List[Any] = argument_lists[0]
|
||||
is_active: Callable[[Any], bool] = lambda v: condition(v, *arguments)
|
||||
elif r.type == "cellIs":
|
||||
operator: str = r.operator
|
||||
try:
|
||||
references: List[Any] = [fml() for fml in formulae]
|
||||
except:
|
||||
logger.exception("Error occurs while calculating reference values for cellIs condition formatting.")
|
||||
continue
|
||||
is_active: Callable[[Any], bool] =\
|
||||
lambda v: _process_xlsx_cf_operator(operator, v, references)
|
||||
else:
|
||||
raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
|
||||
#raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
|
||||
# e.g., type=top10 (rank=number, percent=bool, bottom=bool)
|
||||
# type=aboveAverage (equalAverage=bool, aboveAverage=bool)
|
||||
# type=duplicateValues / type=uniqueValues
|
||||
logger.exception("Not Implemented Condition Type: {:}".format(r.type))
|
||||
# }}} Process Condition Accroding to Type #
|
||||
|
||||
|
||||
# Test Each Cell {{{ #
|
||||
nb_contiguous_nothings = 0
|
||||
for rge in fmt.cells:
|
||||
for c in rge.cells:
|
||||
cell: Cell = worksheet.cell(row=c[0], column=c[1])
|
||||
cell_value = read_cell_value(book_name, sheet_name
|
||||
, coordinate="{:}{:d}".format(get_column_letter(c[1])
|
||||
, c[0]
|
||||
)
|
||||
)
|
||||
if cell_value is None:
|
||||
nb_contiguous_nothings += 1
|
||||
if nb_contiguous_nothings>50:
|
||||
break
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
satisfies_condition: bool = is_active(cell_value)
|
||||
except:
|
||||
logger.exception("Error in formula calculation with cell value %d", repr(cell_value))
|
||||
satisfies_condition = False
|
||||
if satisfies_condition:
|
||||
logger.debug("Active Cell %s(%s) for %s", repr(cell), repr(cell_value), r.formula[0])
|
||||
active_cells.append(cell)
|
||||
# }}} Test Each Cell #
|
||||
|
||||
for c in active_cells:
|
||||
style_dict[c.coordinate] = [_read_cell_style(st, c, r.dxf) for st in concerned_styles]
|
||||
|
||||
Reference in New Issue
Block a user