Merge remote-tracking branch 'origin/main'
This commit is contained in:
@@ -1,2 +1,3 @@
|
||||
from .table import compare_table, compare_with_sparklines, compare_with_charts
|
||||
from .table import check_sheet_list, check_xlsx_freeze
|
||||
from .table import compare_table
|
||||
from .table import check_sheet_list, check_xlsx_freeze, check_zoom
|
||||
from .docs import find_default_font, contains_page_break, compare_docx_files
|
||||
|
||||
65
desktop_env/evaluators/metrics/docs.py
Normal file
65
desktop_env/evaluators/metrics/docs.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from docx import Document
|
||||
|
||||
def find_default_font(expected, config_file_path):
|
||||
"""Find the default font in LibreOffice Writer."""
|
||||
default_font = None
|
||||
try:
|
||||
tree = ET.parse(config_file_path)
|
||||
root = tree.getroot()
|
||||
|
||||
# Define the XML namespace used in the file
|
||||
namespace = {'oor': 'http://openoffice.org/2001/registry'}
|
||||
|
||||
# Search for the node containing the default font setting for LibreOffice Writer
|
||||
for elem in root.findall('.//item[@oor:path="/org.openoffice.Office.Writer/DefaultFont"]', namespace):
|
||||
for prop in elem.findall('.//prop[@oor:name="Standard"]', namespace):
|
||||
for value in prop.findall('value', namespace):
|
||||
default_font = value.text
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
return 1 if default_font == expected else 0
|
||||
|
||||
|
||||
def contains_page_break(docx_file):
|
||||
doc = Document(docx_file)
|
||||
|
||||
namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
|
||||
|
||||
for paragraph in doc.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
br_elems = run.element.findall('.//w:br', namespaces)
|
||||
for br in br_elems:
|
||||
if br is not None and '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type' in br.attrib and br.attrib['{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type'] == 'page':
|
||||
return 1
|
||||
return 0
|
||||
|
||||
def compare_docx_files(file1, file2):
|
||||
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
|
||||
doc1_paragraphs = [p.text for p in doc1.paragraphs]
|
||||
doc2_paragraphs = [p.text for p in doc2.paragraphs]
|
||||
|
||||
if len(doc1_paragraphs) != len(doc2_paragraphs):
|
||||
return 0
|
||||
|
||||
# Compare each paragraph
|
||||
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
|
||||
if p1 != p2:
|
||||
return 0
|
||||
|
||||
return 1
|
||||
|
||||
# file1 = 'path/to/file1.docx'
|
||||
# file2 = 'path/to/file2.docx'
|
||||
|
||||
# print(are_docx_files_same(file1, file2))
|
||||
# Replace 'your_document.docx' with the path to your document
|
||||
# result = contains_page_break('your_document.docx')
|
||||
# print(result)
|
||||
|
||||
#config_path = "/home/[username]/.config/libreoffice/4/user/registrymodifications.xcu"
|
||||
#print(find_default_font("Ani", config_path))
|
||||
@@ -1,56 +1,72 @@
|
||||
import pandas as pd
|
||||
import openpyxl
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.worksheet.worksheet import Worksheet
|
||||
|
||||
from .utils import load_charts, load_sparklines
|
||||
import operator
|
||||
|
||||
from typing import Dict, List
|
||||
from typing import Any
|
||||
from typing import Any, Union
|
||||
from numbers import Number
|
||||
|
||||
|
||||
def compare_table(actual, expected):
|
||||
df1 = pd.read_excel(expected)
|
||||
df2 = pd.read_excel(actual)
|
||||
|
||||
# Compare the DataFrames
|
||||
return 1 if df1.equals(df2) else 0
|
||||
|
||||
|
||||
def compare_with_sparklines(actual: str, expected: str) -> float:
|
||||
df1 = pd.read_excel(actual)
|
||||
df2 = pd.read_excel(expected)
|
||||
normal_content_metric: bool = df1.equals(df2)
|
||||
print("Normal Contents Metric: {:}".format(normal_content_metric))
|
||||
|
||||
sp1 = load_sparklines(actual)
|
||||
sp2 = load_sparklines(expected)
|
||||
sparkline_metric: bool = sp1 == sp2
|
||||
print("Sparkline Metric: {:}".format(sparkline_metric))
|
||||
|
||||
return float(normal_content_metric and sparkline_metric)
|
||||
|
||||
|
||||
def compare_with_charts(actual: str, expected: str, **options) -> float:
|
||||
def compare_table(actual: str, expected: str, **options) -> float:
|
||||
"""
|
||||
Args:
|
||||
actual (str): path to result xlsx
|
||||
expected (str): path to gold xlsx
|
||||
options (Dict[str, List[str]]): dict like {"chart_props": list of str}
|
||||
giving the concerned chart properties
|
||||
options (Dict[str, List[str]]): dict like
|
||||
{
|
||||
"features": list of str for other features, supports:
|
||||
* sparkline
|
||||
* chart
|
||||
* number_format
|
||||
"chart_props": list of str, giving the converned chart properties
|
||||
}
|
||||
|
||||
Return:
|
||||
float: the score
|
||||
"""
|
||||
|
||||
df1 = pd.read_excel(actual)
|
||||
df2 = pd.read_excel(expected)
|
||||
normal_content_metric: bool = df1.equals(df2)
|
||||
print("Normal Contents Metric: {:}".format(normal_content_metric))
|
||||
df1 = pd.read_excel(expected)
|
||||
df2 = pd.read_excel(actual)
|
||||
metric: bool = df1.equals(df2)
|
||||
print("Normal Contents Metric: {:}".format(metric))
|
||||
|
||||
charts1 = load_charts(actual, **options)
|
||||
charts2 = load_charts(expected, **options)
|
||||
chart_metric: bool = charts1 == charts2
|
||||
print("Chart Metric: {:}".format(chart_metric))
|
||||
features: List[str] = options.get("features", [])
|
||||
for ftr in features:
|
||||
workbook1: Workbook = openpyxl.load_workbook(actual)
|
||||
workbook2: Workbook = openpyxl.load_workbook(expected)
|
||||
|
||||
return float(normal_content_metric and chart_metric)
|
||||
if ftr=="sparkline":
|
||||
sp1 = load_sparklines(actual)
|
||||
sp2 = load_sparklines(expected)
|
||||
new_metric: bool = sp1 == sp2
|
||||
print("Sparkline Metric: {:}".format(new_metric))
|
||||
elif ftr=="chart":
|
||||
charts1 = load_charts(workbook1, **options)
|
||||
charts2 = load_charts(workbook2, **options)
|
||||
new_metric: bool = charts1 == charts2
|
||||
print("Chart Metric: {:}".format(new_metric))
|
||||
elif ftr=="number_format":
|
||||
number_formats1: List[str] = [ c.number_format.lower()\
|
||||
for col in workbook1.active.iter_cols()\
|
||||
for c in col\
|
||||
if c.data_type=="n"
|
||||
]
|
||||
number_formats2: List[str] = [ c.number_format.lower()\
|
||||
for col in workbook2.active.iter_cols()\
|
||||
for c in col\
|
||||
if c.data_type=="n"
|
||||
]
|
||||
new_metric: bool = number_formats1==number_formats2
|
||||
print("Number Format Metric: {:}".format(new_metric))
|
||||
else:
|
||||
raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
|
||||
metric = metric and new_metric
|
||||
|
||||
return float(metric)
|
||||
|
||||
def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
|
||||
# workbook: Workbook = openpyxl.load_workbook(filename=result)
|
||||
@@ -90,11 +106,17 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
|
||||
|
||||
return float(passes)
|
||||
|
||||
|
||||
def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
|
||||
worksheet: Worksheet = openpyxl.load_workbook(filename=result).active
|
||||
return float(worksheet.freeze_panes == rules["position"])
|
||||
|
||||
def check_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float:
|
||||
worksheet = openpyxl.load_workbook(filename=result).active
|
||||
zoom_scale: Number = worksheet.sheet_view.zoomScale or 100.
|
||||
return float( getattr(operator, rules["relation"])( zoom_scale
|
||||
, rules["ref_value"]
|
||||
)
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# path1 = ""
|
||||
@@ -132,6 +154,38 @@ if __name__ == '__main__':
|
||||
# ]
|
||||
# print(check_sheet_list(path1, rule))
|
||||
|
||||
path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
|
||||
path2 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
|
||||
print(compare_with_charts(path1, path2, chart_props=["type", "direction"]))
|
||||
#path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
|
||||
#path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
|
||||
#print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"]))
|
||||
|
||||
#path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx"
|
||||
#path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx"
|
||||
#workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
||||
#worksheet1: Worksheet = workbook1.active
|
||||
#
|
||||
#import itertools
|
||||
#for col, r in itertools.product( ['A', 'B', 'C']
|
||||
#, range(1, 9)
|
||||
#):
|
||||
#position: str = "{:}{:d}".format(col, r)
|
||||
#print(worksheet1[position])
|
||||
#print(worksheet1[position].value)
|
||||
#print(worksheet1[position].number_format)
|
||||
#print(compare_table(path1, path2, features=["number_format"]))
|
||||
|
||||
path1 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells_gold.xlsx"
|
||||
path2 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells.xlsx"
|
||||
#workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
||||
#worksheet1: Worksheet = workbook1.active
|
||||
#print(worksheet1.sheet_view.zoomScale)
|
||||
#print(type(worksheet1.sheet_view.zoomScale))
|
||||
#
|
||||
#import os
|
||||
#import os.path
|
||||
#for wb in filter( lambda f: f.endswith(".xlsx")
|
||||
#, os.listdir("../../任务数据/LibreOffice Calc/")
|
||||
#):
|
||||
#path = os.path.join("../../任务数据/LibreOffice Calc/", wb)
|
||||
#print(wb, openpyxl.load_workbook(filename=path).active.sheet_view.zoomScale)
|
||||
print(check_zoom(path1, {"relation": "lt", "ref_value": 100}))
|
||||
print(check_zoom(path2, {"relation": "lt", "ref_value": 100}))
|
||||
|
||||
@@ -56,10 +56,10 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]:
|
||||
# type: "scatterChart" | "lineChart" | "barChart"
|
||||
# direction: "bar" (hori) | "col" (vert)
|
||||
# xtitle, ytitle, ztitle: str
|
||||
def load_charts(xlsx_file: str, **options) -> Dict[str, Any]:
|
||||
def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
|
||||
"""
|
||||
Args:
|
||||
xlsx_file (str): path to xlsx
|
||||
xlsx_file (Workbook): concerned excel book
|
||||
options (Dict[str, List[str]]): dict like {"chart_props": list of str}
|
||||
giving the concerned chart properties
|
||||
|
||||
@@ -67,8 +67,8 @@ def load_charts(xlsx_file: str, **options) -> Dict[str, Any]:
|
||||
Dict[str, Any]: information of charts
|
||||
"""
|
||||
|
||||
workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
|
||||
worksheet: Worksheet = workbook.active
|
||||
#workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
|
||||
worksheet: Worksheet = xlsx_file.active
|
||||
charts: List[ChartBase] = worksheet._charts
|
||||
|
||||
chart_set: Dict[str, Any] = {}
|
||||
|
||||
Reference in New Issue
Block a user