Merge remote-tracking branch 'origin/main'

2023-12-31 00:00:17 +08:00
parent c109c118d9 19b99a13e2
commit 12d478d198
13 changed files with 400 additions and 70 deletions
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -1,2 +1,3 @@
-from .table import compare_table, compare_with_sparklines, compare_with_charts
-from .table import check_sheet_list, check_xlsx_freeze
+from .table import compare_table
+from .table import check_sheet_list, check_xlsx_freeze, check_zoom
+from .docs import find_default_font, contains_page_break, compare_docx_files
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -0,0 +1,65 @@
+import xml.etree.ElementTree as ET
+    
+from docx import Document
+
+def find_default_font(expected, config_file_path):
+    """Find the default font in LibreOffice Writer."""
+    default_font = None
+    try:
+        tree = ET.parse(config_file_path)
+        root = tree.getroot()
+        
+       # Define the XML namespace used in the file
+        namespace = {'oor': 'http://openoffice.org/2001/registry'}
+
+        # Search for the node containing the default font setting for LibreOffice Writer
+        for elem in root.findall('.//item[@oor:path="/org.openoffice.Office.Writer/DefaultFont"]', namespace):
+            for prop in elem.findall('.//prop[@oor:name="Standard"]', namespace):
+                for value in prop.findall('value', namespace):
+                    default_font = value.text
+    except Exception as e:
+        print(f"Error: {e}")
+    return 1 if default_font == expected else 0
+
+
+def contains_page_break(docx_file):
+    doc = Document(docx_file)
+    
+    namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
+    
+    for paragraph in doc.paragraphs:
+        for run in paragraph.runs:
+            br_elems = run.element.findall('.//w:br', namespaces)
+            for br in br_elems:
+                if br is not None and '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type' in br.attrib and br.attrib['{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type'] == 'page':
+                    return 1
+    return 0
+
+def compare_docx_files(file1, file2):
+
+    doc1 = Document(file1)
+    doc2 = Document(file2)
+
+    doc1_paragraphs = [p.text for p in doc1.paragraphs]
+    doc2_paragraphs = [p.text for p in doc2.paragraphs]
+
+    if len(doc1_paragraphs) != len(doc2_paragraphs):
+        return 0
+
+    # Compare each paragraph
+    for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
+        if p1 != p2:
+            return 0
+
+    return 1
+
+# file1 = 'path/to/file1.docx'
+# file2 = 'path/to/file2.docx'
+
+# print(are_docx_files_same(file1, file2))
+# Replace 'your_document.docx' with the path to your document
+# result = contains_page_break('your_document.docx')
+# print(result)
+    
+#config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
+#print(find_default_font("Ani", config_path))
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -1,56 +1,72 @@
 import pandas as pd
 import openpyxl
+from openpyxl import Workbook
 from openpyxl.worksheet.worksheet import Worksheet

 from .utils import load_charts, load_sparklines
+import operator

 from typing import Dict, List
-from typing import Any
+from typing import Any, Union
+from numbers import Number


-def compare_table(actual, expected):
-    df1 = pd.read_excel(expected)
-    df2 = pd.read_excel(actual)
-
-    # Compare the DataFrames
-    return 1 if df1.equals(df2) else 0
-
-
-def compare_with_sparklines(actual: str, expected: str) -> float:
-    df1 = pd.read_excel(actual)
-    df2 = pd.read_excel(expected)
-    normal_content_metric: bool = df1.equals(df2)
-    print("Normal Contents Metric: {:}".format(normal_content_metric))
-
-    sp1 = load_sparklines(actual)
-    sp2 = load_sparklines(expected)
-    sparkline_metric: bool = sp1 == sp2
-    print("Sparkline Metric: {:}".format(sparkline_metric))
-
-    return float(normal_content_metric and sparkline_metric)
-
-
-def compare_with_charts(actual: str, expected: str, **options) -> float:
+def compare_table(actual: str, expected: str, **options) -> float:
    """
    Args:
        actual (str): path to result xlsx
        expected (str): path to gold xlsx
-        options (Dict[str, List[str]]): dict like {"chart_props": list of str}
-          giving the concerned chart properties
+        options (Dict[str, List[str]]): dict like
+          {
+            "features": list of str for other features, supports:
+                * sparkline
+                * chart
+                * number_format
+            "chart_props": list of str, giving the converned chart properties
+          }
+
+    Return:
+        float: the score
    """

-    df1 = pd.read_excel(actual)
-    df2 = pd.read_excel(expected)
-    normal_content_metric: bool = df1.equals(df2)
-    print("Normal Contents Metric: {:}".format(normal_content_metric))
+    df1 = pd.read_excel(expected)
+    df2 = pd.read_excel(actual)
+    metric: bool = df1.equals(df2)
+    print("Normal Contents Metric: {:}".format(metric))

-    charts1 = load_charts(actual, **options)
-    charts2 = load_charts(expected, **options)
-    chart_metric: bool = charts1 == charts2
-    print("Chart Metric: {:}".format(chart_metric))
+    features: List[str] = options.get("features", [])
+    for ftr in features:
+        workbook1: Workbook = openpyxl.load_workbook(actual)
+        workbook2: Workbook = openpyxl.load_workbook(expected)

-    return float(normal_content_metric and chart_metric)
+        if ftr=="sparkline":
+            sp1 = load_sparklines(actual)
+            sp2 = load_sparklines(expected)
+            new_metric: bool = sp1 == sp2
+            print("Sparkline Metric: {:}".format(new_metric))
+        elif ftr=="chart":
+            charts1 = load_charts(workbook1, **options)
+            charts2 = load_charts(workbook2, **options)
+            new_metric: bool = charts1 == charts2
+            print("Chart Metric: {:}".format(new_metric))
+        elif ftr=="number_format":
+            number_formats1: List[str] = [ c.number_format.lower()\
+                                           for col in workbook1.active.iter_cols()\
+                                            for c in col\
+                                            if c.data_type=="n"
+                                         ]
+            number_formats2: List[str] = [ c.number_format.lower()\
+                                           for col in workbook2.active.iter_cols()\
+                                            for c in col\
+                                            if c.data_type=="n"
+                                         ]
+            new_metric: bool = number_formats1==number_formats2
+            print("Number Format Metric: {:}".format(new_metric))
+        else:
+            raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
+        metric = metric and new_metric

+    return float(metric)

 def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
    # workbook: Workbook = openpyxl.load_workbook(filename=result)
@@ -90,11 +106,17 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:

    return float(passes)

-
 def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
    worksheet: Worksheet = openpyxl.load_workbook(filename=result).active
    return float(worksheet.freeze_panes == rules["position"])

+def check_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float:
+    worksheet = openpyxl.load_workbook(filename=result).active
+    zoom_scale: Number = worksheet.sheet_view.zoomScale or 100.
+    return float( getattr(operator, rules["relation"])( zoom_scale
+                                                      , rules["ref_value"]
+                                                      )
+                )

 if __name__ == '__main__':
    # path1 = ""
@@ -132,6 +154,38 @@ if __name__ == '__main__':
    # ]
    # print(check_sheet_list(path1, rule))

-    path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
-    path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
-    print(compare_with_charts(path1, path2, chart_props=["type", "direction"]))
+    #path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
+    #path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
+    #print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"]))
+
+    #path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx"
+    #path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx"
+    #workbook1: Workbook = openpyxl.load_workbook(filename=path1)
+    #worksheet1: Worksheet = workbook1.active
+#
+    #import itertools
+    #for col, r in itertools.product( ['A', 'B', 'C']
+                                   #, range(1, 9)
+                                   #):
+        #position: str = "{:}{:d}".format(col, r)
+        #print(worksheet1[position])
+        #print(worksheet1[position].value)
+        #print(worksheet1[position].number_format)
+    #print(compare_table(path1, path2, features=["number_format"]))
+
+    path1 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells_gold.xlsx"
+    path2 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells.xlsx"
+    #workbook1: Workbook = openpyxl.load_workbook(filename=path1)
+    #worksheet1: Worksheet = workbook1.active
+    #print(worksheet1.sheet_view.zoomScale)
+    #print(type(worksheet1.sheet_view.zoomScale))
+#
+    #import os
+    #import os.path
+    #for wb in filter( lambda f: f.endswith(".xlsx")
+                    #, os.listdir("../../任务数据/LibreOffice Calc/")
+                    #):
+        #path = os.path.join("../../任务数据/LibreOffice Calc/", wb)
+        #print(wb, openpyxl.load_workbook(filename=path).active.sheet_view.zoomScale)
+    print(check_zoom(path1, {"relation": "lt", "ref_value": 100}))
+    print(check_zoom(path2, {"relation": "lt", "ref_value": 100}))
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -56,10 +56,10 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]:
 # type: "scatterChart" | "lineChart" | "barChart"
 # direction: "bar" (hori) | "col" (vert)
 # xtitle, ytitle, ztitle: str
-def load_charts(xlsx_file: str, **options) -> Dict[str, Any]:
+def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
    """
    Args:
-        xlsx_file (str): path to xlsx
+        xlsx_file (Workbook): concerned excel book
        options (Dict[str, List[str]]): dict like {"chart_props": list of str}
          giving the concerned chart properties

@@ -67,8 +67,8 @@ def load_charts(xlsx_file: str, **options) -> Dict[str, Any]:
        Dict[str, Any]: information of charts
    """

-    workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
-    worksheet: Worksheet = workbook.active
+    #workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
+    worksheet: Worksheet = xlsx_file.active
    charts: List[ChartBase] = worksheet._charts

    chart_set: Dict[str, Any] = {}