ver Dec25thv2

implemented functions to load sparklines from xlsx
2023-12-25 19:50:19 +08:00
parent 82e3353f65
commit ba77c276e6
5 changed files with 127 additions and 38 deletions
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -1 +1 @@
-from .table import compare_table
+from .table import compare_table, compare_with_sparklines
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -1,14 +1,74 @@
-def compare_table(expected, actual):
+import pandas as pd
-    import pandas as pd
+import zipfile
 import lxml.etree
 import lxml.cssselect
 from lxml.etree import _Element
 import xmltodict
 #import pylightxl
 from typing import Dict, List
 #from typing import Any
 def compare_table(actual, expected):
    df1 = pd.read_excel(expected)
    df2 = pd.read_excel(actual)
    # Compare the DataFrames
    return 1 if df1.equals(df2) else 0
 _xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
                   , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
                   ]
 _xlsx_ns_mapping = dict(_xlsx_namespaces)
 _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
 _sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
 #print(_sparklines_selector.css)
 def _load_sparklines(xlsx_file: str) -> Dict[str, str]:
    """
    This function modifies data_frame in-place
    Args:
        xlsx_file (str): path to xlsx
    Returns:
        List[Dict[str, str]]: sparkline definitions in form of
          {
            "F3": "Sheet1!C3:E3"
          }
    """
    # read xlsx
    with zipfile.ZipFile(xlsx_file, "r") as z_f:
        with z_f.open("xl/worksheets/sheet1.xml") as f:
            sheet1: _Element = lxml.etree.fromstring(f.read())
            sparklines: List[_Element] = _sparklines_selector(sheet1)
    sparklines_dict: Dict[str, str] = {}
    for sp_l in sparklines:
        sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
        sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
                                                              , process_namespaces=True
                                                              , namespaces=_xlsx_ns_imapping
                                                              )
        sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
    return sparklines_dict
 def compare_with_sparklines(actual: str, expected: str) -> float:
    df1 = pd.read_excel(actual)
    df2 = pd.read_excel(expected)
    normal_content_metric: bool = df1.equals(df2)
    sp1 = _load_sparklines(actual)
    sp2 = _load_sparklines(expected)
    sparkline_metric: bool = sp1 == sp2
    return float(normal_content_metric and sparkline_metric)
 if __name__ == '__main__':
-    path1 = ""
+    #path1 = ""
-    path2 = ""
+    #path2 = ""
-
+    #print(compare_table(path1, path2))
-    print(compare_table(path1, path2))
+    
    path1 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart_gold.xlsx"
    path2 = "../../../../../任务数据/LibreOffice Calc/OrderId_Month_Chart.xlsx"
    print(compare_with_sparklines(path1, path2))
--- a/evaluation_examples/examples/2bd59342-0664-4ccb-ba87-79379096cc08.json
+++ b/evaluation_examples/examples/2bd59342-0664-4ccb-ba87-79379096cc08.json
@@ -3,20 +3,38 @@
  "snapshot": "libreoffice_calc",
  "instruction": "Make sparkline chart line by line",
  "source": "https://www.youtube.com/shorts/L3Z-F1QTQFY",
-  "config": {
+  "config": [
-    "download": [
+    {
-      [
+      "type": "download",
-        "",
+      "parameters": {
-        "C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
+        "files": [
-      ]
+          {
-    ],
+            "url": "https://101.43.24.67/s/wrEyMi8HsmFjQrZ/download/OrderId_Month_Chart.xlsx",
-    "open": [
+            "path": "/home/david/OrderId_Month_Chart.xlsx"
-      "C:\\Users\\tianbaox\\Desktop\\OrderId_Month_Chart.xlsx"
+          }
        ]
      }
    },
    {
      "type": "open",
      "parameters": {
        "path": "/home/david/OrderId_Month_Chart.xlsx"
    ]
  },
  "trajectory": "trajectories/2bd59342-0664-4ccb-ba87-79379096cc08",
  "related_apps": [
    "libreoffice calc"
  ],
-  "evaluator": "evaluation_dir"
+  "evaluator": {
    "expected": {
      "type": "cloud_file",
      "path": "https://101.43.24.67/s/t7pgJxNoAGFQWEM/download/OrderId_Month_Chart_gold.xlsx",
      "dest": "OrderId_Month_Chart_gold.xlsx"
    },
    "result": {
      "type": "vm_file",
      "path": "/home/david/OrderId_Month_Chart.xlsx",
      "dest": "OrderId_Month_Chart.xlsx"
    }
  }
 }
--- a/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json
+++ b/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json
@@ -3,32 +3,40 @@
  "snapshot": "libreoffice_calc",
  "instruction": "Help me fill the columns of First Name, Last Name and Rank",
  "source": "https://www.youtube.com/shorts/uzPo_CPCHH8",
-  "config": {
+  "config": [
-    "download": [
+    {
-      [
+      "type": "download",
-        "https://drive.usercontent.google.com/download?id=1wDqap5cBfxnlqTNrZG61k_wDWTujl6AU&export=download&authuser=0&confirm=t&uuid=fd183b89-76b7-4dc5-880e-1045ed769562&at=APZUnTWp9RMafMg0xohhBWazN3YD:1701785710674",
+      "parameters": {
-        "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
+        "files": [
-      ]
+          {
-    ],
+            "url": "https://101.43.24.67/s/FBip5fXoR4KEJaa",
-    "open": [
+            "path": "/home/david/Employee_Roles_and_Ranks.xlsx"
-      "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
+          }
-    ]
+        ]
-  },
+      }
    },
    {
      "type": "open",
      "parameters": {
        "path": "/home/david/Employee_Roles_and_Ranks.xlsx"
      }
    }
  ],
  "trajectory": "trajectories/37608790-6147-45d0-9f20-1137bb35703d",
  "related_apps": [
    "libreoffice calc"
  ],
  "evaluator": {
-    "func": "compare_table(expected, actual)",
+    "func": "compare_table",
-    "paths": {
+    "expected": {
-      "expected": {
+      "type": "cloud_file",
-        "type": "cloud_file",
+      "path": "https://101.43.24.67/s/wr7B4GeotNNoeHD",
-        "path": "https://drive.usercontent.google.com/download?id=1dxpiUqP_CVvQp5tddxlwO3Cp1BqJ-ZDE&export=download&authuser=0&confirm=t&uuid=ccd204c7-07ce-4fdf-a5d4-a7e4f37b9ce6&at=APZUnTVBs7TgrVrDXpkiU8S7WbQo:1702360836747"
+      "dest": "Employee_Roles_and_Ranks_gold.xlsx"
-      },
+    },
-      "actual": {
+    "result": {
-        "type": "vm_file",
+      "type": "vm_file",
-        "path": "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx"
+      "path": "/home/david/Employee_Roles_and_Ranks.xlsx",
-      }
+      "dest": "Employee_Roles_and_Ranks.xlsx"
    }
  }
 }
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,3 +15,6 @@ tqdm~=4.65.0
 pandas~=2.0.3
 flask~=3.0.0
 requests-toolbelt~=1.0.0
 lxml
 cssselect
 xmltodict
`@@ -1 +1 @@`
	`from .table import compare_table`	`from .table import compare_table, compare_with_sparklines`