ver Dec25thv2

implemented functions to load sparklines from xlsx
2023-12-25 19:50:19 +08:00
parent 82e3353f65
commit ba77c276e6
5 changed files with 127 additions and 38 deletions
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -1 +1 @@
-from .table import compare_table
+from .table import compare_table, compare_with_sparklines
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -1,14 +1,74 @@
-def compare_table(expected, actual):
-    import pandas as pd
+import pandas as pd
+import zipfile
+import lxml.etree
+import lxml.cssselect
+from lxml.etree import _Element
+import xmltodict
+#import pylightxl
+
+from typing import Dict, List
+#from typing import Any
+
+def compare_table(actual, expected):
    df1 = pd.read_excel(expected)
    df2 = pd.read_excel(actual)

    # Compare the DataFrames
    return 1 if df1.equals(df2) else 0

+_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
+                   , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
+                   ]
+_xlsx_ns_mapping = dict(_xlsx_namespaces)
+_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
+_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
+#print(_sparklines_selector.css)
+def _load_sparklines(xlsx_file: str) -> Dict[str, str]:
+    """
+    This function modifies data_frame in-place
+
+    Args:
+        xlsx_file (str): path to xlsx
+
+    Returns:
+        List[Dict[str, str]]: sparkline definitions in form of
+          {
+            "F3": "Sheet1!C3:E3"
+          }
+    """
+
+    # read xlsx
+    with zipfile.ZipFile(xlsx_file, "r") as z_f:
+        with z_f.open("xl/worksheets/sheet1.xml") as f:
+            sheet1: _Element = lxml.etree.fromstring(f.read())
+            sparklines: List[_Element] = _sparklines_selector(sheet1)
+            
+    sparklines_dict: Dict[str, str] = {}
+    for sp_l in sparklines:
+        sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
+        sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
+                                                              , process_namespaces=True
+                                                              , namespaces=_xlsx_ns_imapping
+                                                              )
+        sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
+    return sparklines_dict
+
+def compare_with_sparklines(actual: str, expected: str) -> float:
+    df1 = pd.read_excel(actual)
+    df2 = pd.read_excel(expected)
+    normal_content_metric: bool = df1.equals(df2)
+
+    sp1 = _load_sparklines(actual)
+    sp2 = _load_sparklines(expected)
+    sparkline_metric: bool = sp1 == sp2
+
+    return float(normal_content_metric and sparkline_metric)

 if __name__ == '__main__':
-    path1 = ""
-    path2 = ""
-
-    print(compare_table(path1, path2))
+    #path1 = ""
+    #path2 = ""
+    #print(compare_table(path1, path2))
+    
+    path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx"
+    path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx"
+    print(compare_with_sparklines(path1, path2))
--- a/evaluation_examples/examples/2bd59342-0664-4ccb-ba87-79379096cc08.json
+++ b/evaluation_examples/examples/2bd59342-0664-4ccb-ba87-79379096cc08.json
@@ -3,20 +3,38 @@
  "snapshot": "libreoffice_calc",
  "instruction": "Make sparkline chart line by line",
  "source": "https://www.youtube.com/shorts/L3Z-F1QTQFY",
-  "config": {
-    "download": [
-      [
-        "",
-        "C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
-      ]
-    ],
-    "open": [
-      "C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://101.43.24.67/s/wrEyMi8HsmFjQrZ/download/OrderId_Month_Chart.xlsx",
+            "path": "/home/david/OrderId_Month_Chart.xlsx"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "/home/david/OrderId_Month_Chart.xlsx"
    ]
  },
  "trajectory": "trajectories/2bd59342-0664-4ccb-ba87-79379096cc08",
  "related_apps": [
    "libreoffice calc"
  ],
-  "evaluator": "evaluation_dir"
+  "evaluator": {
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://101.43.24.67/s/t7pgJxNoAGFQWEM/download/OrderId_Month_Chart_gold.xlsx",
+      "dest": "OrderId_Month_Chart_gold.xlsx"
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "/home/david/OrderId_Month_Chart.xlsx",
+      "dest": "OrderId_Month_Chart.xlsx"
+    }
+  }
 }
--- a/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json
+++ b/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json
@@ -3,32 +3,40 @@
  "snapshot": "libreoffice_calc",
  "instruction": "Help me fill the columns of First Name, Last Name and Rank",
  "source": "https://www.youtube.com/shorts/uzPo_CPCHH8",
-  "config": {
-    "download": [
-      [
-        "https://drive.usercontent.google.com/download?id=1wDqap5cBfxnlqTNrZG61k_wDWTujl6AU&export=download&authuser=0&confirm=t&uuid=fd183b89-76b7-4dc5-880e-1045ed769562&at=APZUnTWp9RMafMg0xohhBWazN3YD:1701785710674",
-        "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
-      ]
-    ],
-    "open": [
-      "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
-    ]
-  },
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://101.43.24.67/s/FBip5fXoR4KEJaa",
+            "path": "/home/david/Employee_Roles_and_Ranks.xlsx"
+          }
+        ]
+      }
+    },
+    {
+      "type": "open",
+      "parameters": {
+        "path": "/home/david/Employee_Roles_and_Ranks.xlsx"
+      }
+    }
+  ],
  "trajectory": "trajectories/37608790-6147-45d0-9f20-1137bb35703d",
  "related_apps": [
    "libreoffice calc"
  ],
  "evaluator": {
-    "func": "compare_table(expected, actual)",
-    "paths": {
-      "expected": {
-        "type": "cloud_file",
-        "path": "https://drive.usercontent.google.com/download?id=1dxpiUqP_CVvQp5tddxlwO3Cp1BqJ-ZDE&export=download&authuser=0&confirm=t&uuid=ccd204c7-07ce-4fdf-a5d4-a7e4f37b9ce6&at=APZUnTVBs7TgrVrDXpkiU8S7WbQo:1702360836747"
-      },
-      "actual": {
-        "type": "vm_file",
-        "path": "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
-      }
+    "func": "compare_table",
+    "expected": {
+      "type": "cloud_file",
+      "path": "https://101.43.24.67/s/wr7B4GeotNNoeHD",
+      "dest": "Employee_Roles_and_Ranks_gold.xlsx"
+    },
+    "result": {
+      "type": "vm_file",
+      "path": "/home/david/Employee_Roles_and_Ranks.xlsx",
+      "dest": "Employee_Roles_and_Ranks.xlsx"
    }
  }
 }
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,3 +15,6 @@ tqdm~=4.65.0
 pandas~=2.0.3
 flask~=3.0.0
 requests-toolbelt~=1.0.0
+lxml
+cssselect
+xmltodict