Calc eval fix (#225)

* ver Jun17th

updating annotations

* ver Jun17th

corrected annotation of 1d17
added check for cell merge

* ver Jun17th

updated several annotations

* ver Jun20th

fixed set-up config of 2bd59342-0664-4ccb-ba87-79379096cc08

* fix: Enhance instructions in LibreOffice Calc examples for clarity and specificity, including details on using Pivot Tables, column placements, and revenue calculations.

* ver Jun21st

updating calc evals

* ver Jun22nd

fixed an impress task

* ver Jun22ndv2

adjusted several calc tasks

* Clean scalfolds

---------

Co-authored-by: BowenBryanWang <bryanwang.nlp@connect.hku.hk>
Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
This commit is contained in:
Danyang Zhang
2025-06-30 18:23:09 +08:00
committed by GitHub
parent 30138c5db1
commit d4273d992e
25 changed files with 224 additions and 73 deletions

View File

@@ -1,12 +1,12 @@
import builtins
import datetime
#import datetime
import functools
import itertools
import logging
import operator
import re
import zipfile
import pandas as pd
#import pandas as pd
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
from typing import Dict, List, Set, Match, Tuple, Pattern
from urllib.parse import urlparse, urlunparse
@@ -17,7 +17,7 @@ import lxml.etree
import xmltodict
from lxml.etree import _Element
from openpyxl import Workbook
from openpyxl.cell.cell import Cell
from openpyxl.cell.cell import Cell, MergedCell
from openpyxl.chart._chart import ChartBase
from openpyxl.formatting.formatting import ConditionalFormattingList
from openpyxl.pivot.cache import CacheSource as PivotCacheSource
@@ -292,7 +292,9 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
for elm in str_elements
]
except:
#logger.exception("Read shared strings error: %s", xlsx_file)
logger.debug("Read shared strings error: %s", xlsx_file)
shared_strs: List[str] = []
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
@@ -315,6 +317,7 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
, process_namespaces=True
, namespaces=_xlsx_ns_imapping
)
logger.debug("%s.shared_strings: %s", xlsx_file, repr(shared_strs))
logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell))
try:
if "@t" not in cell["c"] or cell["c"]["@t"] == "n":
@@ -323,6 +326,8 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
return shared_strs[int(cell["c"]["v"])]
if cell["c"]["@t"] == "str":
return cell["c"]["v"]
if cell["c"]["@t"] == "inlineStr":
return cell["c"]["is"]["t"]
except (KeyError, ValueError):
return None
# }}} read_cell_value #
@@ -338,10 +343,11 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# font_underline - "single" | "double" | "singleAccounting" | "doubleAccounting"
# font_size - float
# fill_type - "patternFill" | "gradientFill"
# bgcolor - in aRGB, e.g., FFFF0000 is red
# fgcolor - in aRGB, e.g., FF00FFFF is yellow
# bgcolor - in aRGB, e.g., FFFF0000 is red; This property seems to be ambiguous with fgcolor in xlsx, strange
# fgcolor - in aRGB, e.g., FF00FFFF is yellow # Deprecated
# hyperlink - str
def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any:
# merge - bool, if the cell is in a merged range and is not the first cell in the merged range
def _read_cell_style(style_name: str, cell: Union[Cell, MergedCell], diff_style: Optional[DifferentialStyle] = None) -> Any:
if style_name == "number_format":
return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode) \
if cell.value is not None and cell.data_type == "n" else None
@@ -364,18 +370,24 @@ def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[Different
return (diff_style or cell).fill.tagname
except:
return None
elif style_name == "bgcolor":
elif style_name == "bgcolor" or style_name == "fgcolor":
try:
return (diff_style or cell).fill.bgColor.rgb
except:
return None
elif style_name == "fgcolor":
try:
return (diff_style or cell).fill.fgColor.rgb
#return (diff_style or cell).fill.bgColor.rgb
if diff_style is not None:
return diff_style.fill.bgColor.rgb
else:
return cell.fill.fgColor.rgb
except:
return None
#elif style_name == "fgcolor":
#try:
#return (diff_style or cell).fill.fgColor.rgb
#except:
#return None
elif style_name == "hyperlink":
return cell.hyperlink or "" if cell.value is not None else None
elif style_name == "merge":
return isinstance(cell, MergedCell)
else:
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
@@ -450,6 +462,7 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
)
logger.debug("Absolute range arguments: %s", repr(arguments))
nb_contiguous_nothings = 0
for rge in fmt.cells:
for c in rge.cells:
cell: Cell = worksheet.cell(row=c[0], column=c[1])
@@ -458,7 +471,12 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt
, c[0]
)
)
if condition(cell_value, *arguments):
if cell_value is None:
nb_contiguous_nothings += 1
if nb_contiguous_nothings>50:
break
continue
elif condition(cell_value, *arguments):
logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0])
active_cells.append(cell)
else: