From 6f225b2a020b1e35fd4b1772df2d729d24b2c6c1 Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Fri, 29 Dec 2023 21:43:33 +0800
Subject: [PATCH] ver Dec29thv2

re-organized functions w.r.t. comparing xlsx with a golden one
---
 desktop_env/evaluators/metrics/__init__.py    |   2 +-
 desktop_env/evaluators/metrics/table.py       | 111 ++++++++----------
 desktop_env/evaluators/metrics/utils.py       |   8 +-
 .../21df9241-f8d7-4509-b7f1-37e501a823f7.json |   7 +-
 .../2bd59342-0664-4ccb-ba87-79379096cc08.json |   8 +-
 .../347ef137-7eeb-4c80-a3bb-0951f26a8aff.json |   5 +-
 6 files changed, 72 insertions(+), 69 deletions(-)

diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 73090bc..d4aa8df 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -1,2 +1,2 @@
-from .table import compare_table, compare_with_sparklines, compare_with_charts
+from .table import compare_table
 from .table import check_sheet_list, check_xlsx_freeze
diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py
index 878db8a..7daa2fb 100644
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -9,71 +9,62 @@ from typing import Dict, List
 from typing import Any
 
 
-def compare_table(actual, expected):
-    df1 = pd.read_excel(expected)
-    df2 = pd.read_excel(actual)
-
-    # Compare the DataFrames
-    return 1 if df1.equals(df2) else 0
-
-
-def compare_with_sparklines(actual: str, expected: str) -> float:
-    df1 = pd.read_excel(actual)
-    df2 = pd.read_excel(expected)
-    normal_content_metric: bool = df1.equals(df2)
-    print("Normal Contents Metric: {:}".format(normal_content_metric))
-
-    sp1 = load_sparklines(actual)
-    sp2 = load_sparklines(expected)
-    sparkline_metric: bool = sp1 == sp2
-    print("Sparkline Metric: {:}".format(sparkline_metric))
-
-    return float(normal_content_metric and sparkline_metric)
-
-
-def compare_with_charts(actual: str, expected: str, **options) -> float:
+def compare_table(actual: str, expected: str, **options) -> float:
     """
     Args:
         actual (str): path to result xlsx
         expected (str): path to gold xlsx
-        options (Dict[str, List[str]]): dict like {"chart_props": list of str}
-          giving the concerned chart properties
+        options (Dict[str, List[str]]): dict like
+          {
+            "features": list of str for other features, supports:
+                * sparkline
+                * chart
+                * number_format
+            "chart_props": list of str, giving the converned chart properties
+          }
+
+    Return:
+        float: the score
     """
 
-    df1 = pd.read_excel(actual)
-    df2 = pd.read_excel(expected)
-    normal_content_metric: bool = df1.equals(df2)
-    print("Normal Contents Metric: {:}".format(normal_content_metric))
+    df1 = pd.read_excel(expected)
+    df2 = pd.read_excel(actual)
+    metric: bool = df1.equals(df2)
+    print("Normal Contents Metric: {:}".format(metric))
 
-    charts1 = load_charts(actual, **options)
-    charts2 = load_charts(expected, **options)
-    chart_metric: bool = charts1 == charts2
-    print("Chart Metric: {:}".format(chart_metric))
+    features: List[str] = options.get("features", [])
+    for ftr in features:
+        workbook1: Workbook = openpyxl.load_workbook(actual)
+        workbook2: Workbook = openpyxl.load_workbook(expected)
 
-    return float(normal_content_metric and chart_metric)
+        if ftr=="sparkline":
+            sp1 = load_sparklines(actual)
+            sp2 = load_sparklines(expected)
+            new_metric: bool = sp1 == sp2
+            print("Sparkline Metric: {:}".format(new_metric))
+        elif ftr=="chart":
+            charts1 = load_charts(workbook1, **options)
+            charts2 = load_charts(workbook2, **options)
+            new_metric: bool = charts1 == charts2
+            print("Chart Metric: {:}".format(new_metric))
+        elif ftr=="number_format":
+            number_formats1: List[str] = [ c.number_format.lower()\
+                                           for col in workbook1.active.iter_cols()\
+                                            for c in col\
+                                            if c.data_type=="n"
+                                         ]
+            number_formats2: List[str] = [ c.number_format.lower()\
+                                           for col in workbook2.active.iter_cols()\
+                                            for c in col\
+                                            if c.data_type=="n"
+                                         ]
+            new_metric: bool = number_formats1==number_formats2
+            print("Number Format Metric: {:}".format(new_metric))
+        else:
+            raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
+        metric = metric and new_metric
 
-def compare_with_formats(actual: str, expected: str) -> float:
-    df1 = pd.read_excel(actual)
-    df2 = pd.read_excel(expected)
-    normal_content_metric: bool = df1.equals(df2)
-    print("Normal Contents Metric: {:}".format(normal_content_metric))
-
-    workbook1: Workbook = openpyxl.load_workbook(actual)
-    number_formats1: List[str] = [ c.number_format.lower()\
-                                   for col in workbook1.active.iter_cols()\
-                                    for c in col\
-                                    if c.data_type=="n"
-                                 ]
-    workbook2: Workbook = openpyxl.load_workbook(expected)
-    number_formats2: List[str] = [ c.number_format.lower()\
-                                   for col in workbook2.active.iter_cols()\
-                                    for c in col\
-                                    if c.data_type=="n"
-                                 ]
-    number_format_metric: bool = number_formats1==number_formats2
-    print("Number Format Metric: {:}".format(number_format_metric))
-
-    return float(normal_content_metric & number_format_metric)
+    return float(metric)
 
 def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
     # workbook: Workbook = openpyxl.load_workbook(filename=result)
@@ -155,9 +146,9 @@ if __name__ == '__main__':
     # ]
     # print(check_sheet_list(path1, rule))
 
-    #path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
-    #path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
-    #print(compare_with_charts(path1, path2, chart_props=["type", "direction"]))
+    path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
+    path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
+    print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"]))
 
     path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx"
     path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx"
@@ -172,4 +163,4 @@ if __name__ == '__main__':
         #print(worksheet1[position])
         #print(worksheet1[position].value)
         #print(worksheet1[position].number_format)
-    print(compare_with_formats(path1, path2))
+    print(compare_table(path1, path2, features=["number_format"]))
diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py
index 65db158..b826d87 100644
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -56,10 +56,10 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]:
 # type: "scatterChart" | "lineChart" | "barChart"
 # direction: "bar" (hori) | "col" (vert)
 # xtitle, ytitle, ztitle: str
-def load_charts(xlsx_file: str, **options) -> Dict[str, Any]:
+def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
     """
     Args:
-        xlsx_file (str): path to xlsx
+        xlsx_file (Workbook): concerned excel book
         options (Dict[str, List[str]]): dict like {"chart_props": list of str}
           giving the concerned chart properties
 
@@ -67,8 +67,8 @@ def load_charts(xlsx_file: str, **options) -> Dict[str, Any]:
         Dict[str, Any]: information of charts
     """
 
-    workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
-    worksheet: Worksheet = workbook.active
+    #workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
+    worksheet: Worksheet = xlsx_file.active
     charts: List[ChartBase] = worksheet._charts
 
     chart_set: Dict[str, Any] = {}
diff --git a/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json b/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json
index b0f9577..4347fd6 100644
--- a/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json
+++ b/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json
@@ -27,7 +27,7 @@
 		"libreoffice_calc"
 	],
 	"evaluator": {
-		"func": "compare_with_formats",
+		"func": "compare_table",
 		"result": {
 			"type": "vm_file",
 			"path": "/home/david/Represent_in_millions_billions.xlsx",
@@ -37,6 +37,11 @@
 			"type": "cloud_file",
 			"path": "",
 			"dest": "Represent_in_millions_billions_gold.xlsx"
+		},
+		"options": {
+			"features": [
+				"number_format"
+			]
 		}
 	}
 }
diff --git a/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json b/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json
index aa483ba..4974fcf 100644
--- a/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json
+++ b/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json
@@ -26,7 +26,7 @@
     "libreoffice calc"
   ],
   "evaluator": {
-    "func": "compare_with_sparklines",
+    "func": "compare_table",
     "expected": {
       "type": "cloud_file",
       "path": "https://drive.usercontent.google.com/download?id=1KQJJLVPGtTL_7ArEWvwwbFbJSiA3cgSE&export=download&authuser=0&confirm=t&uuid=6b11c721-caad-439a-b369-4c13c7a485df&at=APZUnTV5-1isKrDKSHV9NeJ6TDeS:1703509054094",
@@ -36,6 +36,10 @@
       "type": "vm_file",
       "path": "/home/david/OrderId_Month_Chart.xlsx",
       "dest": "OrderId_Month_Chart.xlsx"
-    }
+    },
+    "options": {
+      "features": [
+        "sparkline"
+      ]
   }
 }
diff --git a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json
index fd7a7e7..329c4f0 100644
--- a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json
+++ b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json
@@ -27,7 +27,7 @@
     "libreoffice_calc"
   ],
 	"evaluator": {
-		"func": "compare_with_charts",
+		"func": "compare_table",
 		"result": {
 			"type": "vm_file",
 			"path": "/home/david/Create_column_charts_using_statistics.xlsx",
@@ -39,6 +39,9 @@
 			"dest": "Create_column_charts_using_statistics_gold.xlsx"
 		},
 		"options": {
+			"features": [
+				"chart"
+			],
 			"chart_props": [
 				"type",
 				"direction"