ver Jan18th

completed all the incomplete tasks stored under libreoffice_calc before added metric check_data_validations
2024-01-18 17:54:53 +08:00
parent 19214f2107
commit a97c865c0c
8 changed files with 457 additions and 88 deletions
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -10,8 +10,10 @@ import openpyxl
 import pandas as pd
 from openpyxl import Workbook
 from openpyxl.worksheet.worksheet import Worksheet
+#from openpyxl.worksheet.cell_range import MultiCellRange
+from openpyxl.worksheet.datavalidation import DataValidation

-from .utils import load_charts, load_sparklines
+from .utils import load_charts, load_sparklines, _match_value_to_rule

 logger = logging.getLogger("desktopenv.metric.table")

@@ -27,7 +29,7 @@ def compare_table(actual: str, expected: str, **options) -> float:
                * sparkline
                * chart
                * number_format
-            "chart_props": list of str, giving the converned chart properties
+            "chart_props": list of str, giving the concerned chart properties
            "as_shown": bool, TODO
          }

@@ -164,6 +166,57 @@ def check_xlsx_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float:
                                                      )
                 )

+def check_data_validations(result: str, rules: List[Dict[str, Dict[str, Any]]]) -> float:
+    """
+    Args:
+        result (str): path to the concerned xlsx file
+        rules (List[Dict[str, Dict[str, Any]]]): list of dict like
+          {
+            <str as attribute>: {
+                "method": str
+                "ref": something
+            }
+          }
+          Available attributes:
+          * ranges
+          * type
+          * formula1
+          * formula2
+          * operator
+          * allowBlank
+          * showDropDown
+          * showInputMessage
+          * showErrorMessage
+          * error
+          * errorTitle
+          * errorStyle
+          * prompt
+          * promptTitle
+          * imeMode
+
+    Returns:
+        float
+    """
+
+    workbook: Workbook = openpyxl.load_workbook(result)
+    worksheet: Worksheet = workbook.active
+    data_validators: List[DataValidation] = worksheet.data_validations.dataValidation
+
+    total_metric = True
+    for dat_vldt in data_validators:
+        metric = False
+        for r in rules:
+            metric = metric or all( _match_value_to_rule( getattr(dat_vldt, attrbt)
+                                                        , mr
+                                                        )\
+                                for attrbt, mr in r.items()
+                                  )
+            if metric:
+                break
+        total_metric = total_metric and metric
+        if not total_metric:
+            break
+    return float(total_metric)

 if __name__ == '__main__':
    # path1 = ""
@@ -247,8 +300,31 @@ if __name__ == '__main__':
    # print(check_zoom(path1, {"relation": "lt", "ref_value": 100}))
    # print(check_zoom(path2, {"relation": "lt", "ref_value": 100}))

-    path1 = "../../任务数据/LibreOffice Calc/Customers_New_7digit_Id.xlsx"
-    path2 = "../../任务数据/LibreOffice Calc/Customers_New_7digit_Id_gold.xlsx"
+    #path1 = "../../任务数据/LibreOffice Calc/Customers_New_7digit_Id.xlsx"
+    #path2 = "../../任务数据/LibreOffice Calc/Customers_New_7digit_Id_gold.xlsx"
    #data_frame: pd.DataFrame = pd.read_excel(path1)
    #print(data_frame)
-    print(compare_table(path1, path2, as_shown=True))
+    #print(compare_table(path1, path2, as_shown=True))
+
+    #from openpyxl.worksheet.cell_range import MultiCellRange
+
+    path = "../../任务数据/LibreOffice Calc/Order_Id_Mark_Pass_Fail_gold.xlsx"
+    #worksheet: Worksheet = openpyxl.load_workbook(filename=path).active
+    ##print(worksheet.data_validations)
+    #print(type(worksheet.data_validations.dataValidation))
+    #for dat_vldt in worksheet.data_validations.dataValidation:
+        #print(dat_vldt.sqref)
+        #print(all(r in MultiCellRange("D2:D30 B1:B60") for r in dat_vldt.sqref))
+    print( check_data_validations( path, [ { "ranges": { "method": "spreadsheet_range"
+                                                       , "ref": ["D2:D29", "D2:D1048576"]
+                                                       }
+                                           , "type": { "method": "eq"
+                                                     , "ref": "list"
+                                                     }
+                                           , "formula1": { "method": "str_set_eq"
+                                                         , "ref": ["Pass", "Fail", "Held"]
+                                                         }
+                                           }
+                                         ]
+                                 )
+         )
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -6,6 +6,7 @@ from urllib.parse import urlparse, urlunparse
 import re
 import functools
 import operator
+import builtins

 import lxml.cssselect
 import lxml.etree
@@ -15,6 +16,7 @@ from lxml.etree import _Element
 from openpyxl import Workbook
 from openpyxl.chart._chart import ChartBase
 from openpyxl.worksheet.worksheet import Worksheet
+from openpyxl.worksheet.cell_range import MultiCellRange

 V = TypeVar("Value")

@@ -138,6 +140,8 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
 def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
    return all(k in item and item[k] == val for k, val in pattern.items())

+def _multicellrange_containsby(subset_candidate: MultiCellRange, superset_candidate: MultiCellRange) -> bool:
+    return all(r in superset_candidate for r in subset_candidate)
 def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
    """
    Args:
@@ -165,6 +169,23 @@ def _match_value_to_rule(value: V, rule: Dict[str, Union[str, V]]) -> bool:
                         , "ge", "gt"
                         }:
        return getattr(operator, rule["method"])(value, rule["ref"])
+    if rule["method"] == "spreadsheet_range":
+        subset_limit = MultiCellRange(rule["ref"][0])
+        superset_limit = MultiCellRange(rule["ref"][1])
+        return _multicellrange_containsby(subset_limit, value)\
+           and _multicellrange_containsby(value, superset_limit)
+    if rule["method"].startswith("range."): # e.g., range.te [0, 2] -> 0 < x <= 2
+        left_et = rule["method"][6]
+        right_et = rule["method"][7]
+        return getattr(operator, "l" + left_et)(rule["ref"][0], value)\
+           and getattr(operator, "l" + right_et)(value, rule["ref"][1])
+    if rule["method"] in {"str_list_eq", "str_set_eq"}:
+        container_type_str: str = rule["method"][4:-3]
+        container_type = getattr(builtins, container_type_str)
+
+        value: container_type = container_type(value.strip("\"'").split(","))
+        ref: container_type = container_type(rule["ref"])
+        return value==ref
    raise NotImplementedError()

 def are_lists_equal(list1, list2, comparison_func):