Merge branch 'zdy'

This commit is contained in:
David Chang
2024-01-22 14:37:13 +08:00
23 changed files with 573 additions and 400 deletions

View File

@@ -7,6 +7,27 @@
pip install python-docx
```
## LibreOffice Calc
### Required Libraries
```
openpyxl
pandas
lxml
xmltodict
```
### How to Generate CSV from XLSX
```sh
libreoffice --convert-to "csv:Text - txt - csv (StarCalc):44,34,UTF8,,,,false,true,true,false,false,1" --out-dir /home/user /home/user/abc.xlsx
```
This command will generate `abc-Sheet1.csv` under `/home/user`. The last `1` in the conversion options indicates the sheet number (starting from 1) to export. Detailed usage should be referred to at [CSV Filter Options](https://help.libreoffice.org/latest/ro/text/shared/guide/csv_params.html).
Refer to `libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json` for an example.
## Chrome
### Starting Chrome with Remote Debugging for Python

View File

@@ -6,7 +6,7 @@ from .docs import is_first_line_centered, check_file_exists, compare_contains_im
from .general import exact_match, fuzzy_match, check_csv, check_accessibility_tree, check_list
from .libreoffice import check_libre_locale
from .pdf import check_pdf_pages
from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom, check_data_validations
#from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom, check_data_validations
from .table import compare_table
from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, compare_images, compare_audios, \
compare_videos

View File

@@ -1,10 +1,11 @@
import logging
import operator
from numbers import Number
from typing import Any, Union
from typing import Dict, List
from typing import Any, Union, cast, Callable
from typing import Dict, List, Tuple
import os.path
import itertools
import functools
import openpyxl
import pandas as pd
@@ -17,314 +18,273 @@ from .utils import load_charts, load_sparklines, _match_value_to_rule
logger = logging.getLogger("desktopenv.metric.table")
BOOK = Union[pd.ExcelFile, Workbook, str]
def _parse_sheet_idx( sheet_idx: Union[int, str]
, result: BOOK, expected: BOOK
, result_sheet_names: List[str]
, expected_sheet_names: List[str]
) -> Tuple[BOOK, str]:
# function _parse_sheet_idx {{{ #
if isinstance(sheet_idx, int):
index: str = result_sheet_names[sheet_idx]
book: BOOK = result
elif sheet_idx.startswith("RI"):
index: str = result_sheet_names[int(sheet_idx[2:])]
book: BOOK = result
elif sheet_idx.startswith("RN"):
index: str = sheet_idx[2:]
book: BOOK = result
elif sheet_idx.startswith("EI"):
index: str = expected_sheet_names[int(sheet_idx[2:])]
book: BOOK = expected
elif sheet_idx.startswith("EN"):
index: str = sheet_idx[2:]
book: BOOK = expected
else:
logger.error("Unrecognized sheet index")
raise ValueError("Unrecognized sheet index")
return book, index
# }}} function _parse_sheet_idx #
def compare_table(actual: str, expected: str, **options) -> float:
SHEET = Union[pd.DataFrame, Worksheet, List[str]]
def _load_sheet(book: BOOK, index: str) -> SHEET:
# function _load_sheet {{{ #
if isinstance(book, str):
book: str = cast(str, book)
csv_name: str = "{:}-{:}.csv".format(os.path.splitext(book)[0], index)
with open(csv_name) as f:
csv_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0
, map( lambda l: l.strip()
, reversed(f.read().splitlines())
)
)
)
return csv_lines
if isinstance(book, pd.ExcelFile):
return pd.read_excel(book, index)
if isinstance(book, Workbook):
return book[index]
logger.error("Not supported workbook format")
raise NotImplementedError("Not supported workbook format")
# }}} function _load_sheet #
def compare_table(result: str, expected: str, **options) -> float:
# function compare_table {{{ #
"""
Args:
actual (str): path to result xlsx
expected (str): path to gold xlsx
options (Dict[str, List[str]]): dict like
result (str): path to result xlsx
expected (str): path to golden xlsx
rules (List[Dict[str, Any]]): list of dict like
{
"features": list of str for other features, supports:
* sparkline
* chart
* number_format
"chart_props": list of str, giving the concerned chart properties
"as_shown": bool, TODO
"type": str,
<str as parameters>: anything
}
as sequential rules
Return:
Returns:
float: the score
"""
if actual is None:
return 0.
if options.get("as_shown", False):
expected_csv: str = os.path.splitext(expected)[0] + ".csv"
actual_csv: str = os.path.splitext(actual)[0] + ".csv"
with open(expected_csv) as f:
expected_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0
, map( lambda l: l.strip()
, reversed(f.read().splitlines())
)
)
)
if options.get("ignore_case", False):
expected_lines = [l.lower() for l in expected_lines]
with open(actual_csv) as f:
actual_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0
, map( lambda l: l.strip()
, reversed(f.read().splitlines())
)
)
)
if options.get("ignore_case", False):
actual_lines = [l.lower() for l in actual_lines]
metric: bool = expected_lines==actual_lines
logger.debug("Content Metric just as shown: %s", metric)
else:
df1 = pd.read_excel(expected)
df2 = pd.read_excel(actual)
metric: bool = df1.equals(df2)
logger.debug("Normal Content Metric: {:}".format(metric))
features: List[str] = options.get("features", [])
for ftr in features:
workbook1: Workbook = openpyxl.load_workbook(actual)
workbook2: Workbook = openpyxl.load_workbook(expected)
if ftr == "sparkline":
sp1 = load_sparklines(actual)
sp2 = load_sparklines(expected)
new_metric: bool = sp1 == sp2
logger.debug("Sparkline Metric: {:}".format(new_metric))
elif ftr == "chart":
charts1 = load_charts(workbook1, **options)
charts2 = load_charts(workbook2, **options)
new_metric: bool = charts1 == charts2
logger.debug("Chart Metric: {:}".format(new_metric))
elif ftr == "number_format":
number_formats1: List[str] = [c.number_format.lower() \
for col in workbook1.active.iter_cols() \
for c in col \
if c.data_type == "n"
]
number_formats2: List[str] = [c.number_format.lower() \
for col in workbook2.active.iter_cols() \
for c in col \
if c.data_type == "n"
]
new_metric: bool = number_formats1 == number_formats2
logger.debug("Number Format Metric: {:}".format(new_metric))
else:
raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
metric = metric and new_metric
return float(metric)
def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
if result is None:
return 0.
# workbook: Workbook = openpyxl.load_workbook(filename=result)
workbook = pd.ExcelFile(result)
worksheet_names: List[str] = workbook.sheet_names
xlworkbookr: Workbook = openpyxl.load_workbook(filename=result)
pdworkbookr = pd.ExcelFile(xlworkbookr, engine="openpyxl")
worksheetr_names: List[str] = pdworkbookr.sheet_names
xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected)
pdworkbooke = pd.ExcelFile(xlworkbooke, engine="openpyxl")
worksheete_names: List[str] = pdworkbooke.sheet_names
parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] =\
functools.partial( _parse_sheet_idx
, result_sheet_names=worksheetr_names
, expected_sheet_names=worksheete_names
)
passes = True
for r in rules:
for r in options["rules"]:
if r["type"] == "sheet_name":
expected_name: str = worksheet_names[r["sheet_idx"]]
actual_name: str = r["sheet_name"]
metric: bool = expected_name == actual_name
logger.debug("Assertion: {:d}.{:} is {:} - {:}".format(r["sheet_idx"], actual_name, expected_name, metric))
passes = passes and metric
# Compare Sheet Names {{{ #
metric: bool = worksheetr_names==worksheete_names
logger.debug("Assertion: %s.sheet_names == %s.sheet_names - %s", result, expected, metric)
# }}} Compare Sheet Names #
elif r["type"] == "sheet_data":
if isinstance(r["sheet_idx0"], int):
df1: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx0"])
else:
file_name: str
sheet_idx: str
file_name, sheet_idx = r["sheet_idx0"].rsplit("@", maxsplit=1)
sheet_idx: int = int(sheet_idx)
df1: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
if isinstance(r["sheet_idx1"], int):
df2: pd.DataFrame = pd.read_excel(workbook, r["sheet_idx1"])
else:
file_name: str
sheet_idx: str
file_name, sheet_idx = r["sheet_idx1"].rsplit("@", maxsplit=1)
sheet_idx: int = int(sheet_idx)
df2: pd.DataFrame = pd.read_excel(file_name, sheet_idx)
metric: bool = df1.equals(df2)
logger.debug("Assertion: {:} == {:} - {:}".format(r["sheet_idx0"], r["sheet_idx1"], metric))
passes = passes and metric
# Compare Sheet Data by Internal Value {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
sheet1: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx0"], pdworkbookr, pdworkbooke))
sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke))
metric: bool = sheet1.equals(sheet2)
logger.debug("Assertion: %s =v= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Sheet Data by Internal Value #
elif r["type"] == "sheet_print":
# Compare Sheet Data by Printed Value {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# ignore_case: optional, defaults to False
sheet1: List[str] = _load_sheet(*parse_idx(r["sheet_idx0"], result, expected))
sheet2: List[str] = _load_sheet(*parse_idx(r["sheet_idx1"], result, expected))
if r.get("ignore_case", False):
sheet1 = [l.lower() for l in sheet1]
sheet2 = [l.lower() for l in sheet2]
metric: bool = sheet1 == sheet2
logger.debug("Assertion: %s =p= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Sheet Data by Printed Value #
elif r["type"] == "sparkline":
# Compare Sparklines {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
sparkline1: Dict[str, str] = load_sparklines(*parse_idx(r["sheet_idx0"], result, expected))
sparkline2: Dict[str, str] = load_sparklines(*parse_idx(r["sheet_idx1"], result, expected))
metric: bool = sparkline1 == sparkline2
logger.debug("Assertion: %s.sp == %.sp - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Sparklines #
elif r["type"] == "chart":
# Compare Charts {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# chart_props: list of str, see utils.load_charts
charts1: Dict[str, Any] = load_charts(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
charts2: Dict[str, Any] = load_charts(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
metric: bool = charts1 == charts2
logger.debug("Assertion: %s[chart] == %s[chart] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Charts #
elif r["type"] == "number_format":
# Compare Number Formats {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
sheet1: Worksheet = _load_sheet(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke))
sheet2: Worksheet = _load_sheet(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke))
number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.data_type=="n"]
number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.data_type=="n"]
metric: bool = number_formats1 == number_formats2
logger.debug("Assertion: %s.nf == %s.nf - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Number Formats #
elif r["type"] == "freeze":
# Compare Freezing {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
sheet1: Worksheet = _load_sheet(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke))
sheet2: Worksheet = _load_sheet(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke))
metric: bool = sheet1.freeze_panes == sheet2.freeze_panes
logger.debug( "Assertion: %s.freeze(%s) == %s.freeze(%s) - %s"
, r["sheet_idx0"], sheet1.freeze_panes
, r["sheet_idx1"], sheet2.freeze_panes
, metric
)
# }}} Compare Freezing #
elif r["type"] == "zoom":
# Check Zooming {{{ #
# sheet_idx: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# method: str
# ref: value
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
zoom_scale: Number = sheet.sheet_view.zoomScale or 100.
metric: bool = _match_value_to_rule(zoom_scale, r)
logger.debug("Assertion: %s.zoom(%.1f) %s %.1f - %s", r["sheet_idx"], zoom_scale, r["method"], r["ref"], metric)
# }}} Check Zooming #
elif r["type"] == "data_validation":
# Check Data Validation {{{ #
# sheet_idx: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# dv_props: list of dict like {attribute: "method": str, "ref": anythin}
# available attributes:
# * ranges
# * type
# * formula1
# * formula2
# * operator
# * allowBlank
# * showDropDown
# * showInputMessage
# * showErrorMessage
# * error
# * errorTitle
# * errorStyle
# * prompt
# * promptTitle
# * imeMode
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
data_validators: List[DataValidation] = sheet.data_validations.dataValidation
total_metric = True
for dat_vldt in data_validators:
metric = False
for r in r["dv_props"]:
metric = metric or all( _match_value_to_rule( getattr(dat_vldt, attrbt)
, mr
)\
for attrbt, mr in r.items()
)
if metric:
break
total_metric = total_metric and metric
if not total_metric:
break
logger.debug("Assertion: %s.data_validation - %s", r["sheet_idx"], total_metric)
metric: bool = total_metric
# }}} Check Data Validation #
else:
raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))
return float(passes)
def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
if result is None:
return 0.
worksheet: Worksheet = openpyxl.load_workbook(filename=result).active
return float(worksheet.freeze_panes == rules["position"])
def check_xlsx_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float:
if result is None:
return 0.
worksheet = openpyxl.load_workbook(filename=result).active
zoom_scale: Number = worksheet.sheet_view.zoomScale or 100.
return float(getattr(operator, rules["relation"])(zoom_scale
, rules["ref_value"]
)
)
def check_data_validations(result: str, rules: List[Dict[str, Dict[str, Any]]]) -> float:
"""
Args:
result (str): path to the concerned xlsx file
rules (List[Dict[str, Dict[str, Any]]]): list of dict like
{
<str as attribute>: {
"method": str
"ref": something
}
}
Available attributes:
* ranges
* type
* formula1
* formula2
* operator
* allowBlank
* showDropDown
* showInputMessage
* showErrorMessage
* error
* errorTitle
* errorStyle
* prompt
* promptTitle
* imeMode
Returns:
float
"""
workbook: Workbook = openpyxl.load_workbook(result)
worksheet: Worksheet = workbook.active
data_validators: List[DataValidation] = worksheet.data_validations.dataValidation
total_metric = True
for dat_vldt in data_validators:
metric = False
for r in rules:
metric = metric or all( _match_value_to_rule( getattr(dat_vldt, attrbt)
, mr
)\
for attrbt, mr in r.items()
)
if metric:
break
total_metric = total_metric and metric
if not total_metric:
passes = passes and metric
if not passes:
break
return float(total_metric)
return float(passes)
# }}} function compare_table #
if __name__ == '__main__':
# path1 = ""
# path2 = ""
# print(compare_table(path1, path2))
# path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx"
# path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx"
# print(compare_with_sparklines(path1, path2))
# path1 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column_gold.xlsx"
# path2 = "../../../../../任务数据/LibreOffice Calc/Freeze_row_column.xlsx"
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
# worksheet1: Worksheet = workbook1.active
# print(worksheet1.freeze_panes)
# workbook2: Workbook = openpyxl.load_workbook(filename=path2)
# worksheet2: Worksheet = workbook2.active
# print(worksheet2.freeze_panes)
# rule = {"position": "C6"}
# print(check_xlsx_freeze(path1, rule))
# path1 = "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert_gold.xlsx"
# rule = [ { "type": "sheet_name"
# , "sheet_idx": 0
# , "sheet_name": "Sheet1"
# }
# , { "type": "sheet_data"
# , "sheet_idx0": "../../../../../任务数据/LibreOffice Calc/copy_sheet_insert.xlsx@0"
# , "sheet_idx1": 1
# }
# , { "type": "sheet_name"
# , "sheet_idx": 2
# , "sheet_name": "Sheet2"
# }
# ]
# print(check_sheet_list(path1, rule))
# path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
# print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"]))
# path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx"
# path1 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot_gold.xlsx"
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
# worksheet1: Worksheet = workbook1.active
# import itertools
# for col, r in itertools.product( ['A', 'B']
# , range(1, 20)
# ):
# position: str = "{:}{:d}".format(col, r)
# print(worksheet1[position])
# print(worksheet1[position].value)
# print(worksheet1[position].number_format)
# workbook2: Workbook = openpyxl.load_workbook(filename=path2)
# worksheet2: Worksheet = workbook2.active
# for col, r in itertools.product( ['A', 'B']
# , range(1, 20)
# ):
# position: str = "{:}{:d}".format(col, r)
# print(worksheet2[position])
# print(worksheet2[position].value)
# print(worksheet2[position].number_format)
# print(compare_table(path1, path2, features=["number_format"]))
# path1 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells_gold.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells.xlsx"
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
# worksheet1: Worksheet = workbook1.active
# print(worksheet1.sheet_view.zoomScale)
# print(type(worksheet1.sheet_view.zoomScale))
#
# import os
# import os.path
# for wb in filter( lambda f: f.endswith(".xlsx")
# , os.listdir("../../任务数据/LibreOffice Calc/")
# ):
# path = os.path.join("../../任务数据/LibreOffice Calc/", wb)
# print(wb, openpyxl.load_workbook(filename=path).active.sheet_view.zoomScale)
# print(check_zoom(path1, {"relation": "lt", "ref_value": 100}))
# print(check_zoom(path2, {"relation": "lt", "ref_value": 100}))
#path1 = "../../任务数据/LibreOffice Calc/Customers_New_7digit_Id.xlsx"
#path2 = "../../任务数据/LibreOffice Calc/Customers_New_7digit_Id_gold.xlsx"
#data_frame: pd.DataFrame = pd.read_excel(path1)
#print(data_frame)
#print(compare_table(path1, path2, as_shown=True))
#from openpyxl.worksheet.cell_range import MultiCellRange
path = "../../任务数据/LibreOffice Calc/Order_Id_Mark_Pass_Fail_gold.xlsx"
#worksheet: Worksheet = openpyxl.load_workbook(filename=path).active
##print(worksheet.data_validations)
#print(type(worksheet.data_validations.dataValidation))
#for dat_vldt in worksheet.data_validations.dataValidation:
#print(dat_vldt.sqref)
#print(all(r in MultiCellRange("D2:D30 B1:B60") for r in dat_vldt.sqref))
print( check_data_validations( path, [ { "ranges": { "method": "spreadsheet_range"
, "ref": ["D2:D29", "D2:D1048576"]
}
, "type": { "method": "eq"
, "ref": "list"
}
, "formula1": { "method": "str_set_eq"
, "ref": ["Pass", "Fail", "Held"]
}
}
]
)
path1 = "../../任务数据/LibreOffice Calc/Freeze_row_column.xlsx"
path2 = "../../任务数据/LibreOffice Calc/Freeze_row_column_gold.xlsx"
rules = [ { "type": "sheet_data"
, "sheet_idx0": 0
, "sheet_idx1": "EI0"
}
, { "type": "freeze"
, "sheet_idx0": 0
, "sheet_idx1": "EI0"
}
]
print( compare_table( path1, path2
, rules=rules
)
)
print( compare_table( path2, path2
, rules=rules
)
)
#path = "../../任务数据/LibreOffice Calc/Order_Id_Mark_Pass_Fail_gold.xlsx"
#print( check_data_validations( path, [ { "ranges": { "method": "spreadsheet_range"
#, "ref": ["D2:D29", "D2:D1048576"]
#}
#, "type": { "method": "eq"
#, "ref": "list"
#}
#, "formula1": { "method": "str_set_eq"
#, "ref": ["Pass", "Fail", "Held"]
#}
#}
#]
#)
#)

View File

@@ -22,21 +22,21 @@ V = TypeVar("Value")
logger = logging.getLogger("desktopenv.metrics.utils")
_xlsx_namespaces = [("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
]
_xlsx_namespaces = [ ("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")
, ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
]
_xlsx_ns_mapping = dict(_xlsx_namespaces)
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_sheet_name_selector = lxml.cssselect.CSSSelector("oo|sheets>oo|sheet", namespaces=_xlsx_ns_mapping)
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
# print(_sparklines_selector.css)
def load_sparklines(xlsx_file: str) -> Dict[str, str]:
def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
"""
This function modifies data_frame in-place
Args:
xlsx_file (str): path to xlsx
sheet_name (str): sheet name
Returns:
List[Dict[str, str]]: sparkline definitions in form of
@@ -47,9 +47,13 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]:
# read xlsx
with zipfile.ZipFile(xlsx_file, "r") as z_f:
with z_f.open("xl/worksheets/sheet1.xml") as f:
sheet1: _Element = lxml.etree.fromstring(f.read())
sparklines: List[_Element] = _sparklines_selector(sheet1)
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
sheets: List[_Element] = _sheet_name_selector(workbook_database)
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read())
sparklines: List[_Element] = _sparklines_selector(sheet)
sparklines_dict: Dict[str, str] = {}
for sp_l in sparklines:
@@ -70,10 +74,11 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]:
# type: "scatterChart" | "lineChart" | "barChart"
# direction: "bar" (hori) | "col" (vert)
# xtitle, ytitle, ztitle: str
def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
"""
Args:
xlsx_file (Workbook): concerned excel book
sheet_name (str): sheet name
options (Dict[str, List[str]]): dict like {"chart_props": list of str}
giving the concerned chart properties
@@ -82,7 +87,7 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
"""
# workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
worksheet: Worksheet = xlsx_file.active
worksheet: Worksheet = xlsx_file[sheet_name]
charts: List[ChartBase] = worksheet._charts
chart_set: Dict[str, Any] = {}

View File

@@ -63,7 +63,7 @@
"command": [
"libreoffice",
"--convert-to",
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true",
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
"--outdir",
"/home/user",
"/home/user/Customers_New_7digit_Id.xlsx"
@@ -76,11 +76,11 @@
"type": "vm_file",
"path": [
"/home/user/Customers_New_7digit_Id.xlsx",
"/home/user/Customers_New_7digit_Id.csv"
"/home/user/Customers_New_7digit_Id-Sheet1.csv"
],
"dest": [
"Customers_New_7digit_Id.xlsx",
"Customers_New_7digit_Id.csv"
"Customers_New_7digit_Id-Sheet1.csv"
],
"multi": true
},
@@ -92,12 +92,18 @@
],
"dest": [
"Customers_New_7digit_Id_gold.xlsx",
"Customers_New_7digit_Id_gold.csv"
"Customers_New_7digit_Id_gold-Sheet1.csv"
],
"multi": true
},
"options": {
"as_shown": true
"rules": [
{
"type": "sheet_print",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
"snapshot": "libreoffice_calc",
"instruction": "Copy sheet1 and insert it before sheet2",
"instruction": "Copy Sheet1 and insert it before Sheet2 and rename it as Sheet1.bak",
"source": "https://www.libreofficehelp.com/add-insert-delete-copy-move-rename-a-worksheet-in-libreoffice-calc/",
"config": [
{
@@ -52,19 +52,21 @@
}
}
],
"func": "check_sheet_list",
"func": "compare_table",
"result": {
"type": "vm_file",
"path": "/home/user/copy_sheet_insert.xlsx",
"dest": "copy_sheet_insert.xlsx"
},
"expected": {
"type": "rule",
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=15SjZNQtdt55SW6FIpi2JmWmVOpoZGjEB&export=download&authuser=0&confirm=t&uuid=182ac3dc-e0e9-4d30-b800-b842d5fcd665&at=APZUnTXVlvE4vv1-QO7HKqQOrikJ:1705850636082",
"dest": "copy_sheet_insert_gold.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_name",
"sheet_idx": 0,
"sheet_name": "Sheet1"
"type": "sheet_name"
},
{
"type": "sheet_data",
@@ -72,9 +74,9 @@
"sheet_idx1": 1
},
{
"type": "sheet_name",
"sheet_idx": 2,
"sheet_name": "Sheet2"
"type": "sheet_data",
"sheet_idx0": 1,
"sheet_idx1": "EI1"
}
]
}

View File

@@ -52,18 +52,31 @@
}
}
],
"func": "check_xlsx_zoom",
"func": "compare_table",
"result": {
"type": "vm_file",
"path": "/home/user/Zoom_Out_Oversized_Cells.xlsx",
"dest": "Zoom_Out_Oversized_Cells.xlsx"
},
"expected": {
"type": "rule",
"rules": {
"relation": "lt",
"ref_value": 260
}
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1sX_roln3dNbXat0umlAIwZrW2eBmLkLE&export=download&authuser=0&confirm=t&uuid=a8f92992-86bd-43f4-92ce-9e4d99dca980&at=APZUnTUDPJVW_y6QeI9vlxlHAS0Q:1705851308643",
"dest": "Zoom_Out_Oversized_Cells_gold.xlsx"
},
"options": {
"rules": [
{
"type": "zoom",
"sheet_idx": 0,
"method": "lt",
"ref": 260
},
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -63,7 +63,7 @@
"command": [
"libreoffice",
"--convert-to",
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true",
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
"--outdir",
"/home/user",
"/home/user/Represent_in_millions_billions.xlsx"
@@ -76,11 +76,11 @@
"type": "vm_file",
"path": [
"/home/user/Represent_in_millions_billions.xlsx",
"/home/user/Represent_in_millions_billions.csv"
"/home/user/Represent_in_millions_billions-Sheet1.csv"
],
"dest": [
"Represent_in_millions_billions.xlsx",
"Represent_in_millions_billions.csv"
"Represent_in_millions_billions-Sheet1.csv"
],
"multi": true
},
@@ -92,13 +92,19 @@
],
"dest": [
"Represent_in_millions_billions_gold.xlsx",
"Represent_in_millions_billions_gold.csv"
"Represent_in_millions_billions_gold-Sheet1.csv"
],
"multi": true
},
"options": {
"as_shown": true,
"ignore_case": true
"rules": [
{
"type": "sheet_print",
"sheet_idx0": "RNSheet1",
"sheet_idx1": "ENSheet1",
"ignore_case": true
}
]
}
}
}

View File

@@ -63,8 +63,18 @@
"dest": "OrderId_Month_Chart.xlsx"
},
"options": {
"features": [
"sparkline"
"rules": [
{
"type": "sparkline",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
},
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "347ef137-7eeb-4c80-a3bb-0951f26a8aff",
"snapshot": "libreoffice_calc",
"instruction": "Could you create two column charts of per-month costs for me using statistics in the form?",
"instruction": "Here are two tables recording the per-month costs in 2019 and 2020. I want to create two column bar charts reflecting per-month cost variations for each year from these data. Help me, Mr. Assistant!",
"source": "https://www.youtube.com/watch?v=bgO40-CjYNY",
"config": [
{
@@ -64,12 +64,21 @@
"dest": "Create_column_charts_using_statistics_gold.xlsx"
},
"options": {
"features": [
"chart"
],
"chart_props": [
"type",
"direction"
"rules": [
{
"type": "chart",
"sheet_idx0": 0,
"sheet_idx1": "EI0",
"chart_props": [
"type",
"direction"
]
},
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "37608790-6147-45d0-9f20-1137bb35703d",
"snapshot": "libreoffice_calc",
"instruction": "Help me fill the columns of First Name, Last Name and Rank",
"instruction": "The information are mixed in one field. Help me split them and fill in the columns of First Name, Last Name and Rank",
"source": "https://www.youtube.com/shorts/uzPo_CPCHH8",
"config": [
{
@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://101.43.24.67/s/FBip5fXoR4KEJaa",
"url": "https://drive.usercontent.google.com/download?id=1wDqap5cBfxnlqTNrZG61k_wDWTujl6AU&export=download&authuser=0&confirm=t&uuid=3c8203ea-c38f-411e-a7ea-bbb107548820&at=APZUnTVGfTjNFeZpqjITXs53uU3q:1705893319445",
"path": "/home/user/Employee_Roles_and_Ranks.xlsx"
}
]
@@ -27,16 +27,50 @@
"libreoffice calc"
],
"evaluator": {
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "Employee_Roles_and_Ranks.xlsx - LibreOffice Calc",
"strict": true
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
]
}
}
],
"func": "compare_table",
"expected": {
"type": "cloud_file",
"path": "https://101.43.24.67/s/wr7B4GeotNNoeHD",
"path": "https://drive.usercontent.google.com/download?id=1dxpiUqP_CVvQp5tddxlwO3Cp1BqJ-ZDE&export=download&authuser=0&confirm=t&uuid=c79992d0-547f-4dc8-ae99-ff25327e04c6&at=APZUnTVARKZ-4KG_DHs1vAbbaaL_:1705893286722",
"dest": "Employee_Roles_and_Ranks_gold.xlsx"
},
"result": {
"type": "vm_file",
"path": "/home/user/Employee_Roles_and_Ranks.xlsx",
"dest": "Employee_Roles_and_Ranks.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "4188d3a4-077d-46b7-9c86-23e1a036f6c1",
"snapshot": "libreoffice_calc",
"instruction": "Help me freeze the range A1:B5 on this sheet",
"instruction": "Help me freeze the range A1:B1 on this sheet to keep the headers always visible",
"source": "https://www.libreofficehelp.com/freeze-unfreeze-rows-columns-ranges-calc/",
"config": [
{
@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1ZhGLDKOden_oxzuN2xN9-jNQSHtCX6GE&export=download&authuser=0&confirm=t&uuid=2c097276-a610-4a9f-b6e4-5b54296c1555&at=APZUnTWc7zKPY_ykygn0mO1SAs4s:1703580957447",
"url": "https://drive.usercontent.google.com/download?id=1pmTn-32BIwXOu2NXgQG7-PCRPaNL5bYN&export=download&authuser=0&confirm=t&uuid=13f1a8fd-997e-4364-a182-ad8768eaac6a&at=APZUnTUOM1rcgxy87qxR1Rkdwdor:1705895562093",
"path": "/home/user/Freeze_row_column.xlsx"
}
]
@@ -52,17 +52,30 @@
}
}
],
"func": "check_xlsx_freeze",
"func": "compare_table",
"result": {
"type": "vm_file",
"path": "/home/user/Freeze_row_column.xlsx",
"dest": "Freeze_row_column.xlsx"
},
"expected": {
"type": "rule",
"rules": {
"position": "C6"
}
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1oTsiIJFiAIFaTo320izMP7ml1RDCctgD&export=download&authuser=0&confirm=t&uuid=e3ec92c6-c2bf-4a7e-8e10-c8287350c9c6&at=APZUnTXEp-KV7bido8GcENfWIxME:1705895589811",
"dest": "Freeze_row_column_gold.xlsx"
},
"options": {
"rules": [
{
"type": "freeze",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
},
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "4f07fbe9-70de-4927-a4d5-bb28bc12c52c",
"snapshot": "libreoffice_calc",
"instruction": "Could you help me padding numbers to three decimals when used within formula?",
"instruction": "Here I want to use the numerical value from a cell in the text. I can set its number of decimal digits to 2 in the original value cell but don't know how to fix it in the text as well. Please help me to do this.",
"source": "https://superuser.com/questions/1081048/libreoffice-calc-how-to-pad-number-to-fixed-decimals-when-used-within-formula",
"config": [
{
@@ -31,7 +31,7 @@
{
"type": "activate_window",
"parameters": {
"window_name": "OrderId_Month_Chart.xlsx - LibreOffice Calc",
"window_name": "Padding_Decimals_In_Formular.xlsx - LibreOffice Calc",
"strict": true
}
},
@@ -50,18 +50,59 @@
"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
]
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"libreoffice",
"--convert-to",
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
"--outdir",
"/home/user",
"/home/user/Padding_Decimals_In_Formular.xlsx"
]
}
}
],
"func": "compare_table",
"result": {
"type": "vm_file",
"path": "/home/user/Padding_Decimals_In_Formular_gold.xlsx",
"dest": "Padding_Decimals_In_Formular.xlsx"
"path": [
"/home/user/Padding_Decimals_In_Formular.xlsx",
"/home/user/Padding_Decimals_In_Formular-Sheet1.csv"
],
"dest": [
"Padding_Decimals_In_Formular.xlsx",
"Padding_Decimals_In_Formular-Sheet1.csv"
],
"multi": true
},
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1DJTBic8_DREPhcau1GUkISfRm-L6xbBv&export=download&authuser=0&confirm=t&uuid=18e7d364-37fb-4e7f-baf1-9444ae5813ba&at=APZUnTXYuAaQ-Aa-2yqTn_2MXe0u:1704200380362",
"dest": "Padding_Decimals_In_Formular_gold.xlsx"
"path": [
"https://drive.usercontent.google.com/download?id=1DJTBic8_DREPhcau1GUkISfRm-L6xbBv&export=download&authuser=0&confirm=t&uuid=18e7d364-37fb-4e7f-baf1-9444ae5813ba&at=APZUnTXYuAaQ-Aa-2yqTn_2MXe0u:1704200380362",
"https://drive.usercontent.google.com/download?id=1Wy0Vf9Ivo6TLAv1nmkkkWTw0w14_hZIk&export=download&authuser=0&confirm=t&uuid=ad79fcff-1c23-4c11-aa47-a6a606f0a053&at=APZUnTUaukeh54t-7G1vWCms1AxS:1705896771858"
],
"dest": [
"Padding_Decimals_In_Formular_gold.xlsx",
"Padding_Decimals_In_Formular_gold-Sheet1.xlsx"
]
},
"options": {
"rules": [
{
"type": "sheet_print",
"sheet_idx0": "RNSheet1",
"sheet_idx1": "ENSheet1"
}
]
}
}
}

View File

@@ -62,6 +62,15 @@
"type": "vm_file",
"path": "/home/user/Name_Order_Id_move_column.xlsx",
"dest": "Name_Order_Id_move_column.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -1,22 +0,0 @@
{
"id": "7b802dad-6e0f-4204-9815-d4e3f57627d8",
"snapshot": "libreoffice_calc",
"instruction": "I would like to sort this table based on cell color, placing all the rows marked with pink at the beginning, while keeping their order among themselves unchanged.",
"source": "https://www.youtube.com/shorts/Of-lzeP1usE",
"config": {
"download": [
[
"",
"C:\\Users\\tianbaox\\Desktop\\Customer_Sort_by_cell_color.xlsx"
]
],
"open": [
"C:\\Users\\tianbaox\\Desktop\\Customer_Sort_by_cell_color.xlsx"
]
},
"trajectory": "trajectories/7b802dad-6e0f-4204-9815-d4e3f57627d8",
"related_apps": [
"libreoffice calc"
],
"evaluator": "evaluation_dir"
}

View File

@@ -62,6 +62,15 @@
"type": "vm_file",
"path": "/home/user/Order_Sales_Serial#.xlsx",
"dest": "Order_Sales_Serial#.xlsx"
}
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -63,7 +63,7 @@
"command": [
"libreoffice",
"--convert-to",
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true",
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
"--outdir",
"/home/user",
"/home/user/Set_Decimal_Separator_Dot.xlsx"
@@ -76,11 +76,11 @@
"type": "vm_file",
"path": [
"/home/user/Set_Decimal_Separator_Dot.xlsx",
"/home/user/Set_Decimal_Separator_Dot.csv"
"/home/user/Set_Decimal_Separator_Dot-Sheet1.csv"
],
"dest": [
"Set_Decimal_Separator_Dot.xlsx",
"Set_Decimal_Separator_Dot.csv"
"Set_Decimal_Separator_Dot-Sheet1.csv"
],
"multi": true
},
@@ -92,12 +92,18 @@
],
"dest": [
"Set_Decimal_Separator_Dot_gold.xlsx",
"Set_Decimal_Separator_Dot_gold.csv"
"Set_Decimal_Separator_Dot_gold-Sheet1.csv"
],
"multi": true
},
"options": {
"as_shown": true
"rules": [
{
"type": "sheet_print",
"sheet_idx0": "RNSheet1",
"sheet_idx1": "ENSheet1"
}
]
}
}
}

View File

@@ -62,6 +62,15 @@
"type": "vm_file",
"path": "/home/user/Movie_title_garbage_clean.xlsx",
"dest": "Movie_title_garbage_clean.xlsx"
}
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "aa3a8974-2e85-438b-b29e-a64df44deb4b",
"snapshot": "libreoffice_calc",
"instruction": "Could you help me resizing cells in the sheet so that they fit into just one page and export as pdf file?",
"instruction": "I'm working on a project and need to resize cells in a spreadsheet to fit onto one page and export to PDF for efficient presentation. Could you help me on this?",
"source": "https://www.quora.com/Libre-Office-Calc-How-do-I-resize-all-cells-in-a-sheet-to-make-them-fit-to-1-page-for-printing-and-exporting-as-PDF",
"config": [
{

View File

@@ -62,6 +62,15 @@
"type": "vm_file",
"path": "/home/user/Student_Grades_and_Remarks.xlsx",
"dest": "Student_Grades_and_Remarks.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -62,6 +62,15 @@
"type": "vm_file",
"path": "/home/user/Students_Class_Subject_Marks.xlsx",
"dest": "Students_Class_Subject_Marks.xlsx"
}
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -53,22 +53,32 @@
}
],
"func": "check_data_validations",
"expected": {
"type": "rule",
"options": {
"rules": [
{
"ranges": {
"method": "spreadsheet_range",
"ref": ["D2:D29", "D2:D1048576"]
},
"type": {
"method": "eq",
"ref": "list"
},
"formula1": {
"method": "str_set_eq",
"ref": ["Pass", "Fail", "Held"]
}
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
},
{
"type": "data_validation",
"sheet_idx": 0,
"dv_props": [
{
"ranges": {
"method": "spreadsheet_range",
"ref": ["D2:D29", "D2:D1048576"]
},
"type": {
"method": "eq",
"ref": "list"
},
"formula1": {
"method": "str_set_eq",
"ref": ["Pass", "Fail", "Held"]
}
}
]
}
]
},
@@ -76,6 +86,11 @@
"type": "vm_file",
"path": "/home/user/Order_Id_Mark_Pass_Fail.xlsx",
"dest": "Order_Id_Mark_Pass_Fail.xlsx"
},
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=125u7hc0LOcHXtJhOBd_Z5vt__7kwYOTQ&export=download&authuser=0&confirm=t&uuid=17e5da1b-fb1d-45d8-a9b1-6cd146ebaeee&at=APZUnTVqGyk6n5NPKzrq4KSOe871:1705898482101",
"dest": "Order_Id_Mark_Pass_Fail_gold.xlsx"
}
}
}

View File

@@ -62,6 +62,15 @@
"type": "vm_file",
"path": "/home/user/Quarterly_Product_Sales_by_Zone.xlsx",
"dest": "Quarterly_Product_Sales_by_Zone.xlsx"
}
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}