Merge branch 'zdy'

This commit is contained in:
David Chang
2024-01-23 23:59:18 +08:00
8 changed files with 491 additions and 27 deletions

View File

@@ -1,7 +1,7 @@
import logging
#import operator
from numbers import Number
from typing import Any, Union, cast, Callable
from typing import Any, Union, cast, Callable, Iterable
from typing import Dict, List, Tuple
import os.path
import itertools
@@ -13,9 +13,11 @@ from openpyxl import Workbook
from openpyxl.worksheet.worksheet import Worksheet
#from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.cell.cell import Cell
#from openpyxl.utils import coordinate_to_tuple
from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles
from .utils import _match_value_to_rule
from .utils import _match_value_to_rule, _read_cell_style, read_cell_value
logger = logging.getLogger("desktopenv.metric.table")
@@ -91,11 +93,11 @@ def compare_table(result: str, expected: str, **options) -> float:
return 0.
xlworkbookr: Workbook = openpyxl.load_workbook(filename=result)
pdworkbookr = pd.ExcelFile(xlworkbookr, engine="openpyxl")
pdworkbookr = pd.ExcelFile(result)
worksheetr_names: List[str] = pdworkbookr.sheet_names
xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected)
pdworkbooke = pd.ExcelFile(xlworkbooke, engine="openpyxl")
pdworkbooke = pd.ExcelFile(expected)
worksheete_names: List[str] = pdworkbooke.sheet_names
parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] =\
@@ -165,7 +167,7 @@ def compare_table(result: str, expected: str, **options) -> float:
# Compare Style (Also Conditional Formatiing) {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# props: list of str indicating concerned styles
# props: list of str indicating concerned styles, see utils._read_cell_style
styles1: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
styles2: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
@@ -283,6 +285,31 @@ def compare_table(result: str, expected: str, **options) -> float:
logger.debug("Assertion: %s[cols] == %s[cols] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Check Row Properties #
elif r["type"] == "check_cell":
# Check Cell Properties {{{ #
# sheet_idx: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# coordinate: str, "E3"
# props: dict like {attribute: {"method": str, "ref": anything}}
# supported attributes: value & those supported by utils._read_cell_style
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
#data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke))
cell: Cell = sheet[r["coordinate"]]
metric: bool = True
for prpt, rule in r["props"].items():
if prpt=="value":
val = read_cell_value(*parse_idx(r["sheet_idx"], result, expected), r["coordinate"])
else:
val = _read_cell_style(prpt, cell)
metric = metric and _match_value_to_rule(val, rule)
logger.debug( "Assertion: %s[%s] :%s - %s"
, r["sheet_idx"], r["coordinate"]
, repr(r["props"]), metric
)
# }}} Check Cell Properties #
else:
raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))
@@ -293,6 +320,24 @@ def compare_table(result: str, expected: str, **options) -> float:
return float(passes)
# }}} function compare_table #
def compare_csv(result: str, expected: str, **options) -> float:
if result is None:
return 0.
with open(result) as f:
result_lines: Iterable[str] = f.read().splitlines()
with open(expected) as f:
expected_lines: Iterable[str] = f.read().splitlines()
if not options.get("strict", True):
result_lines = map(str.strip, result_lines)
expected_lines = map(str.strip, expected_lines)
if options.get("ignore_case", False):
result_lines = map(str.lower, result_lines)
expected_lines = map(str.lower, expected_lines)
metric: bool = list(result_lines)==list(expected_lines)
return float(metric)
if __name__ == '__main__':
import datetime
import sys
@@ -326,16 +371,15 @@ if __name__ == '__main__':
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
path1 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days.xlsx"
path2 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold.xlsx"
rules = [ { "type": "sheet_data"
, "sheet_idx0": 0
, "sheet_idx1": "EI0"
}
, { "type": "style"
, "sheet_idx0": 0
, "sheet_idx1": "EI0"
, "props": ["bgcolor"]
path1 = "../../任务数据/LibreOffice Calc/Multiply_Time_Number.xlsx"
path2 = "../../任务数据/LibreOffice Calc/Multiply_Time_Number_gold.xlsx"
rules = [ { "type": "check_cell"
, "sheet_idx": 0
, "coordinate": "E3"
, "props": { "value": { "method": "approx:0.001"
, "ref": 191.6667
}
}
}
]
print( compare_table( path1, path2

View File

@@ -34,6 +34,7 @@ _xlsx_namespaces = [ ("oo", "http://schemas.openxmlformats.org/spreadsheetml/200
]
_xlsx_ns_mapping = dict(_xlsx_namespaces)
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None
_sheet_name_selector = lxml.cssselect.CSSSelector("oo|sheets>oo|sheet", namespaces=_xlsx_ns_mapping)
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
@@ -154,6 +155,48 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
return chart_set
# }}} function load_charts #
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# read_cell_value {{{ #
with zipfile.ZipFile(xlsx_file, "r") as z_f:
try:
with z_f.open("xl/sharedStrings.xml") as f:
shared_str_xml: _Element = lxml.etree.fromstring(f.read())
str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
shared_strs: List[str] = [elm.text for elm in str_elements]
except:
logger.debug("Read shared strings error: %s", xlsx_file)
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
sheets: List[_Element] = _sheet_name_selector(workbook_database)
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read())
cells: List[_Element] =\
lxml.cssselect.CSSSelector( 'oo|row>oo|c[r="{:}"]'.format(coordinate)
, namespaces=_xlsx_ns_mapping
)(sheet)
if len(cells)==0:
return None
cell: _Element = cells[0]
cell: Dict[str, str] = xmltodict.parse( lxml.etree.tostring(cell, encoding="unicode")
, process_namespaces=True
, namespaces=_xlsx_ns_imapping
)
logger.debug("%s.%s[%s]: %s", xlsx_file, sheet_name, coordinate, repr(cell))
if "@t" not in cell["c"]:
return None
if cell["c"]["@t"] == "s":
return shared_strs[int(cell["c"]["v"])]
if cell["c"]["@t"] == "n":
return float(cell["c"]["v"])
if cell["c"]["@t"] == "str":
return cell["c"]["v"]
# }}} read_cell_value #
# Supported Styles:
# number_format
# font_name - str
@@ -311,6 +354,15 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
, "ge", "gt"
}:
return getattr(operator, rule["method"])(value, rule["ref"])
if rule["method"].startswith("approx"):
threshold: float = float(rule["method"].split(":")[1])
logger.debug("Approx: TH%f, REF%f, VAL%s", threshold, rule["ref"], repr(value))
try:
value = float(value)
except (ValueError, TypeError):
return False
else:
return abs(value-rule["ref"])<=threshold
if rule["method"] == "spreadsheet_range":
subset_limit = MultiCellRange(rule["ref"][0])
superset_limit = MultiCellRange(rule["ref"][1])

View File

@@ -0,0 +1,77 @@
{
"id": "357ef137-7eeb-4c80-a3bb-0951f26a8aff",
"snapshot": "libreoffice_calc",
"instruction": "I have calculated the total work hours from the everday hours. And I have an hourly rate. Now I want to multiply the total hours with the hourly rate to get a total earned amount. However, I can't get a correct answer by directly multiply the two cells. Here the \"total hours\" is of time and \"hourly rate\" is just a number. How can I get the correct product of them?",
"source": "https://www.reddit.com/r/excel/comments/17zny8u/calculating_total_amount_earned_from_total_hours/",
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1sOEf4lBmj7CrZytbPfqUhDGnIqhAa6WL&export=download&authuser=0&confirm=t&uuid=65393df5-96b7-45e9-bdd8-be51c1794e72&at=APZUnTUTe0CZiQEw-iZSZOA4Hcrl:1706016332794",
"path": "/home/user/Multiply_Time_Number.xlsx"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "/home/user/Multiply_Time_Number.xlsx"
}
}
],
"trajectory": "trajectories/357ef137-7eeb-4c80-a3bb-0951f26a8aff",
"related_apps": [
"libreoffice calc"
],
"evaluator": {
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "Multiply_Time_Number.xlsx - LibreOffice Calc",
"strict": true
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
]
}
}
],
"func": "compare_table",
"result": {
"type": "vm_file",
"path": "/home/user/Multiply_Time_Number.xlsx",
"dest": "Multiply_Time_Number.xlsx"
},
"options": {
"rules": [
{
"type": "check_cell",
"sheet_idx": 0,
"coordinate": "E3",
"props": {
"value": {
"method": "approx:0.001",
"ref": 191.6667
}
}
}
]
}
}
}

View File

@@ -0,0 +1,42 @@
{
"id": "3aaa4e37-dc91-482e-99af-132a612d40f3",
"snapshot": "libreoffice_calc",
"instruction": "Could you help me to export the current sheet to a csv file? Export the contents just as they are shown on the screen. Just keep the other options untouched. A default csv format is ok. The csv should share the file name with the original xlsx.",
"source": "https://www.quora.com/How-can-you-import-export-CSV-files-with-LibreOffice-Calc-or-OpenOffice",
"config": [
{
"type": "download",
"parameters": {
"file": [
{
"url": "https://drive.usercontent.google.com/download?id=17sNGNFpZtmwuz74Pid2WwWL_rgTkgTg0&export=download&authuser=0&confirm=t&uuid=18d3601d-e329-4525-bd11-633c678601d6&at=APZUnTVY9kvNoMeETP6HVRynDHqq:1706012082827",
"path": "/home/user/Export_Calc_to_CSV.xlsx"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "/home/user/Export_Calc_to_CSV.xlsx"
}
}
],
"trajectory": "trajectories/3aaa4e37-dc91-482e-99af-132a612d40f3",
"related_apps": [
"libreoffice_calc"
],
"evaluator": {
"func": "compare_csv",
"result": {
"type": "vm_file",
"path": "/home/user/Export_Calc_to_CSV.csv",
"dest": "Export_Calc_to_CSV.csv"
},
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1cjyinycSAxi5oBb-dcpC85CMX9i-ggg-&export=download&authuser=0&confirm=t&uuid=b18a6fbd-8318-49ee-a62b-af22523df9e7&at=APZUnTV6dhR5_TcDg002RQ5CHmoC:1706012321261",
"dest": "Export_Calc_to_CSV_gold.csv"
}
}
}

View File

@@ -0,0 +1,76 @@
{
"id": "51b11269-2ca8-4b2a-9163-f21758420e78",
"snapshot": "libreoffice_calc",
"instruction": "Could you help me to sort the records accoring to the amounts ascendingly?",
"source": "https://www.reddit.com/r/LibreOfficeCalc/comments/186pcc6/how_to_arrange_numbers_in_a_column_from_minimum/",
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1lMPR8itmrjesLPh8dqxrssNNnlQUSF8Y&export=download&authuser=0&confirm=t&uuid=f79c8ece-f42d-42dc-b320-9ee4d9a6b5d4&at=APZUnTUQAGyFOhO9mMS4r_Ap-cFy:1706001993533",
"path": "/home/user/Arrang_Value_min_to_max.xlsx"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "/home/user/Arrang_Value_min_to_max.xlsx"
}
}
],
"trajectory": "trajectories/51b11269-2ca8-4b2a-9163-f21758420e78",
"related_apps": [
"libreoffice calc"
],
"evaluator": {
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "Arrang_Value_min_to_max.xlsx - LibreOffice Calc",
"strict": true
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
]
}
}
],
"func": "compare_table",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1gZq47ls5eD-YpRH8MIeEYOpYwAAUR_Ua&export=download&authuser=0&confirm=t&uuid=085c84a0-eb3f-4fe8-9fa0-6d2495985cc0&at=APZUnTVtt_9I5I8emixgM2hbnjMh:1706002085970",
"dest": "Arrang_Value_min_to_max_gold.xlsx"
},
"result": {
"type": "vm_file",
"path": "/home/user/Arrang_Value_min_to_max.xlsx",
"dest": "Arrang_Value_min_to_max.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -0,0 +1,109 @@
{
"id": "6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
"snapshot": "libreoffice_calc",
"instruction": "Help me format column \"spent\" by keeping two decimal points. ",
"source": "https://www.youtube.com/watch?v=nl-nXjJurhQ",
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1FG_e6DhoYybUMtkhAOJrJWCctIcP1E1I&export=download&authuser=0&confirm=t&uuid=2ee528f2-b86d-415d-a93b-350a1fa3820d&at=APZUnTXTQiGRl6DlkMFts81FT-En:1706005475837",
"path": "/home/user/Keep_Two_decimal_points.xlsx"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "/home/user/Keep_Two_decimal_points.xlsx"
}
}
],
"trajectory": "trajectories/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
"related_apps": [
"libreoffice calc"
],
"evaluator": {
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "Keep_Two_decimal_points.xlsx - LibreOffice Calc",
"strict": true
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
]
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"libreoffice",
"--convert-to",
"csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1",
"--outdir",
"/home/user",
"/home/user/Keep_Two_decimal_points.xlsx"
]
}
}
],
"func": "compare_table",
"expected": {
"type": "cloud_file",
"path": [
"https://drive.usercontent.google.com/download?id=150XJSpqU-HuYSjPi7_LgrlQFUjKhKCU8&export=download&authuser=0&confirm=t&uuid=9b9fd955-ee06-4c55-a240-290dcd658d84&at=APZUnTW87ODEUqUzDpP2tah8tQCo:1706005816811",
"https://drive.usercontent.google.com/download?id=1-cxAMcFdKRUl0bFHr1nwI1XudvGV__s1&export=download&authuser=0&confirm=t&uuid=2aa40a50-3081-4d88-8d1d-e3adcfa2050e&at=APZUnTXeohokhdZeJzu3XT-ZyUwB:1706005770752"
],
"dest": [
"Keep_Two_decimal_points_gold.xlsx",
"Keep_Two_decimal_points_gold-Sheet1.csv"
],
"multi": true
},
"result": {
"type": "vm_file",
"path": [
"/home/user/Keep_Two_decimal_points.xlsx",
"/home/user/Keep_Two_decimal_points-Sheet1.xlsx"
],
"dest": [
"Keep_Two_decimal_points.xlsx",
"Keep_Two_decimal_points-Sheet1.csv"
],
"multi": true
},
"options": {
"rules": [
{
"type": "sheet_print",
"sheet_idx0": "RNSheet1",
"sheet_idx1": "ENSheet1"
}
]
}
}
}

View File

@@ -0,0 +1,76 @@
{
"id": "7e429b8d-a3f0-4ed0-9b58-08957d00b127",
"snapshot": "libreoffice_calc",
"instruction": "I have a lookup table for the officers of each branch. Please, here is another table in which I need to fill with the officer names according the headoffice (i.e., the branch name). Help me to complete this.",
"source": "https://medium.com/@divyangichaudhari17/how-to-use-vlookup-and-hlookup-in-libre-calc-3370698bb3ff",
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1MChpDlA3nkS8l8rCG9AU6vYz8kPv7eXS&export=download&authuser=0&confirm=t&uuid=2031ca44-b8f1-486e-a6f8-ee87518dc842&at=APZUnTVWaM1Pz6ZZ5hertMLxjb_v:1706003657696",
"path": "/home/user/VLOOKUP_Fill_the_form.xlsx"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "/home/user/VLOOKUP_Fill_the_form.xlsx"
}
}
],
"trajectory": "trajectories/7e429b8d-a3f0-4ed0-9b58-08957d00b127",
"related_apps": [
"libreoffice calc"
],
"evaluator": {
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "VLOOKUP_Fill_the_form.xlsx - LibreOffice Calc",
"strict": true
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; pyautogui.press([\"ctrl\", \"s\"]);"
]
}
}
],
"func": "compare_table",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1vjZj7KgfvEnAcXigUWGuIt1Xy6E2W_B5&export=download&authuser=0&confirm=t&uuid=f58e573f-ba94-4904-a5e4-4a7a4b095e6e&at=APZUnTVX7yytFQJQ2rdQ3o5zKVhd:1706003731041",
"dest": "VLOOKUP_Fill_the_form_gold.xlsx"
},
"result": {
"type": "vm_file",
"path": "/home/user/VLOOKUP_Fill_the_form.xlsx",
"dest": "VLOOKUP_Fill_the_form.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": 0,
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -1,12 +0,0 @@
{
"id": "b188fe10-ae67-4db8-a154-26a0b8ff8f1e",
"snapshot": "thunderbird",
"instruction": "Help me restore the zebra stripes background in message list panel.",
"source": "https://www.reddit.com/r/Thunderbird/comments/17vv2os/restore_readability_in_message_list_pane/",
"config": [],
"trajectory": "trajectories/",
"related_apps": [
"thunderbird"
],
"evaluator": "evaluation_dir"
}