Merge main

This commit is contained in:
BlankCheng
2024-01-29 21:51:26 +08:00
135 changed files with 2393 additions and 1280 deletions

View File

@@ -2,7 +2,7 @@ from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, g
get_shortcuts_on_desktop, get_history, get_enabled_experiments, get_chrome_language, get_chrome_font_size, \
get_profile_name, get_number_of_search_results, get_googledrive_file, get_active_tab_info
from .file import get_cloud_file, get_vm_file, get_cache_file
from .general import get_vm_command_line
from .general import get_vm_command_line, get_vm_terminal_output
from .impress import get_audio_in_slide
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory
from .misc import get_rule, get_accessibility_tree

View File

@@ -18,3 +18,7 @@ def get_vm_command_line(env, config: Dict[str, str]):
else:
logger.error("Failed to get vm command line. Status code: %d", response.status_code)
return None
def get_vm_terminal_output(env, config: Dict[str, str]):
return env.controller.get_terminal_output()

View File

@@ -10,15 +10,17 @@ from .general import exact_match, fuzzy_match
from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
compare_insert_equation, compare_highlighted_text
from .docs import is_first_line_centered, check_file_exists, compare_contains_image
from .general import check_csv, check_accessibility_tree, run_sqlite3, check_json
from .general import check_csv, check_accessibility_tree, run_sqlite3, check_json, check_list
from .general import exact_match, fuzzy_match, check_include_exclude
from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions
from .gimp import compare_images
from .libreoffice import check_libre_locale
from .pdf import check_pdf_pages
from .slides import check_presenter_console_disable, check_image_stretch_and_center, check_slide_numbers_color, \
compare_pptx_files, check_strikethrough, \
check_slide_orientation_Portrait, evaluate_presentation_fill_to_rgb_distance, check_left_panel
from .table import compare_table
check_slide_orientation_Portrait, evaluate_presentation_fill_to_rgb_distance, check_left_panel, check_transition
# from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom, check_data_validations
from .table import compare_table, compare_csv
from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
from .gimp import (
check_brightness_decrease_and_structure_sim,
@@ -40,4 +42,4 @@ from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, co
check_qt_slider_colours, check_global_key_play_pause
from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed, check_json_settings, \
check_json_keybindings
from .os import check_gnome_favorite_apps, is_utc_0, check_text_enlarged, check_moved_jpgs
from .basic_os import check_gnome_favorite_apps, is_utc_0, check_text_enlarged, check_moved_jpgs

View File

@@ -1,6 +1,8 @@
import logging
import os
import xml.etree.ElementTree as ET
import zipfile
import re
from typing import List, Dict, Any
from docx import Document
@@ -48,24 +50,30 @@ def contains_page_break(docx_file):
return 0
def compare_docx_files(file1, file2):
def compare_docx_files(file1, file2, ignore_blanks=True):
doc1 = Document(file1)
doc2 = Document(file2)
doc1_paragraphs = [p.text for p in doc1.paragraphs]
doc2_paragraphs = [p.text for p in doc2.paragraphs]
if len(doc1_paragraphs) != len(doc2_paragraphs):
# print(len(doc1_paragraphs))
# print(len(doc2_paragraphs))
return 0
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if p1 != p2:
# print(p1)
# print(p2)
if ignore_blanks:
text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
if text1 != text2:
return 0
else:
if len(doc1_paragraphs) != len(doc2_paragraphs):
# print(len(doc1_paragraphs))
# print(len(doc2_paragraphs))
return 0
# Compare each paragraph
for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
if p1 != p2:
# print(p1)
# print(p2)
return 0
return 1
@@ -245,16 +253,26 @@ def evaluate_colored_words_in_tables(file_path1, file_path2):
def check_highlighted_words(file_path1, file_path2):
if not compare_docx_files(file_path1, file_path2):
# if not compare_docx_files(file_path1, file_path2):
# return 0
# Extract content.xml from the .odt file
extract_dir = file_path1 + "_extracted"
with zipfile.ZipFile(file_path1, 'r') as zip_ref:
zip_ref.extractall(extract_dir)
content_xml_path = os.path.join(extract_dir, 'content.xml')
with open(content_xml_path, 'r', encoding="utf-8") as file:
content_xml = file.read()
# Check for yellow highlights in the content.xml
yellow_highlight_pattern = re.compile(r'(.{0,50}background-color="#ffff00"[^>]*>.{0,50})')
yellow_highlight_matches = yellow_highlight_pattern.findall(content_xml)
# Return True if yellow highlights are NOT found, otherwise True
if yellow_highlight_matches:
return 0
document = Document(file_path1)
for paragraph in document.paragraphs:
for run in paragraph.runs:
if run.font.highlight_color is not None:
return 0 # Highlighted words found
return 1 # No highlighted words found
else:
return 1
def evaluate_strike_through_last_paragraph(file_path1, file_path2):
@@ -415,4 +433,4 @@ def compare_highlighted_text(file1, file2):
doc2_highlighted = extract_highlighted_text(Document(file2))
# Compare the sets of highlighted text to check if they are the same
return set(doc1_highlighted) == set(doc2_highlighted)
return set(doc1_highlighted) == set(doc2_highlighted)

View File

@@ -1,6 +1,23 @@
import os
from PIL import Image, ImageStat, ImageChops
from typing import List, Union
from skimage.metrics import structural_similarity as ssim
from PIL import Image, ImageChops, ImageStat
def compare_images(pred_img_path_list: Union[str, List[str]],
gold_img_path_list: Union[str, List[str]]) -> float:
""" Compare two image lists, only if all images are the same, return 1.0, otherwise return 0.0
"""
if type(pred_img_path_list) != list:
pred_img_path_list = [pred_img_path_list]
gold_img_path_list = [gold_img_path_list]
for pred_img_path, gold_img_path in zip(pred_img_path_list, gold_img_path_list):
pred_img = Image.open(pred_img_path)
gold_img = Image.open(gold_img_path)
diff = ImageChops.difference(pred_img, gold_img)
if diff.getbbox():
return 0.0
return 1.0
def get_gimp_export_path():

View File

@@ -6,6 +6,8 @@ from pypdf import PdfReader
def check_pdf_pages(pdf_file: str, rules: Dict[str, Any]) -> float:
if pdf_file is None:
return 0.0
reader = PdfReader(pdf_file)
nb_pages: int = len(reader.pages)
return float(getattr(operator, rules["relation"])(nb_pages, rules["ref_value"]))

View File

@@ -1,5 +1,6 @@
import logging
import xml.etree.ElementTree as ET
import zipfile
from math import sqrt
from pptx import Presentation
@@ -57,9 +58,9 @@ def check_image_stretch_and_center(modified_ppt, original_ppt):
abs(the_modified_image.height - original_pres.slide_height) > Inches(0.1) or
abs(the_modified_image.left - (original_pres.slide_width - the_modified_image.width) / 2) > Inches(0.1) or
abs(the_modified_image.top - (original_pres.slide_height - the_modified_image.height) / 2) > Inches(0.1)):
return False
return 0.
return True
return 1.
def is_red_color(color):
@@ -97,6 +98,39 @@ def check_slide_numbers_color(pptx_file_path):
return 1 if font_color is not None and is_red_color(font_color) else 0
# import numpy as np
# from PIL import Image
# from skimage.metrics import structural_similarity as ssim
# def compare_images(image1_path, image2_path):
# # You would call this function with the paths to the two images you want to compare:
# # score = compare_images('path_to_image1', 'path_to_image2')
# # print("Similarity score:", score)
# if not image1_path or not image2_path:
# return 0
# # Open the images and convert to grayscale
# image1 = Image.open(image1_path).convert('L')
# image2 = Image.open(image2_path).convert('L')
# # Resize images to the smaller one's size for comparison
# image1_size = image1.size
# image2_size = image2.size
# new_size = min(image1_size, image2_size)
# image1 = image1.resize(new_size, Image.Resampling.LANCZOS)
# image2 = image2.resize(new_size, Image.Resampling.LANCZOS)
# # Convert images to numpy arrays
# image1_array = np.array(image1)
# image2_array = np.array(image2)
# # Calculate SSIM between two images
# similarity_index = ssim(image1_array, image2_array)
# return similarity_index
def compare_pptx_files(file1_path, file2_path, **options):
# todo: not strictly match since not all information is compared because we cannot get the info through pptx
prs1 = Presentation(file1_path)
@@ -111,52 +145,58 @@ def compare_pptx_files(file1_path, file2_path, **options):
examine_font_italic = options.get("examine_font_italic", True)
examine_color_rgb = options.get("examine_color_rgb", True)
examine_font_underline = options.get("examine_font_underline", True)
examine_strike_through = options.get("examine_strike_through", True)
# compare the number of slides
if len(prs1.slides) != len(prs2.slides) and examine_number_of_slides:
return False
return 0
# compare the content of each slide
for slide1, slide2 in zip(prs1.slides, prs2.slides):
# check if the shapes are the same
for shape1, shape2 in zip(slide1.shapes, slide2.shapes):
if (
shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height) and examine_shape:
return False
return 0
if hasattr(shape1, "text") and hasattr(shape2, "text"):
if shape1.text != shape2.text and examine_text:
return False
return 0
# check if the paragraphs are the same
for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
# check if the runs are the same
for run1, run2 in zip(para1.runs, para2.runs):
if run1.text != run2.text and examine_text:
return False
return 0
# check if the font properties are the same
if run1.font.name != run2.font.name and examine_font_name:
return False
return 0
if run1.font.size != run2.font.size and examine_font_size:
return False
return 0
if run1.font.bold != run2.font.bold and examine_font_bold:
return False
return 0
if run1.font.italic != run2.font.italic and examine_font_italic:
return False
return 0
if run1.font.color.rgb != run2.font.color.rgb and examine_color_rgb:
return False
return 0
if run1.font.underline != run2.font.underline and examine_font_underline:
return False
return 0
if ('strike' in run1.font._element.attrib) != (
'strike' in run2.font._element.attrib) and examine_strike_through:
return 0
# fixme: Actually there are more properties to be compared, but we cannot get them through pptx
return True
return 1
def check_strikethrough(pptx_path, rules):
@@ -167,21 +207,27 @@ def check_strikethrough(pptx_path, rules):
shape_index_s = rules["shape_index_s"]
paragraph_index_s = rules["paragraph_index_s"]
for slide_index in slide_index_s:
# Get the slide
slide = presentation.slides[slide_index]
try:
for slide_index in slide_index_s:
# Get the slide
slide = presentation.slides[slide_index]
for shape_index in shape_index_s:
# Get the text box
paragraphs = slide.shapes[shape_index].text_frame.paragraphs
for shape_index in shape_index_s:
# Get the text box
paragraphs = slide.shapes[shape_index].text_frame.paragraphs
for paragraph_index in paragraph_index_s:
paragraph = paragraphs[paragraph_index]
run = paragraph.runs[0]
if 'strike' not in run.font._element.attrib:
return False
for paragraph_index in paragraph_index_s:
paragraph = paragraphs[paragraph_index]
run = paragraph.runs[0]
if 'strike' not in run.font._element.attrib:
return 0
return True
except Exception as e:
logger.error(f"Error: {e}")
return 0
return 1
def check_slide_orientation_Portrait(pptx_path):
@@ -221,14 +267,54 @@ def check_left_panel(accessibility_tree):
root = ET.fromstring(accessibility_tree)
for root_pane in root.iter('root-pane'):
for panel in root_pane.iter('panel'):
for split_pane in panel.iter('split-pane'):
# Get the left panel
if split_pane.attrib.get("{{{}}}parentcoord".format(namespaces['cp'])) == "(0, 0)":
# Get the visible attribute
visible = split_pane.attrib.get("{{{}}}visible".format(namespaces['st']))
if visible:
# decide if it is left panel
return 1.
for split_pane in root_pane.iter('split-pane'):
for panel in split_pane.iter('panel'):
for scroll_panel in panel.iter('scroll-pane'):
for document_frame in scroll_panel.iter('document-frame'):
# Get the left panel
panel_name = document_frame.get("name")
# visible = scroll_bar.attrib.get(f"{{{namespaces['st']}}}visible")
if panel_name == "Slides View":
# Left panel is open
return 1.0
return 0.
# Left panel is not open
return 0.0
def check_transition(pptx_file, rules):
slide_idx = rules['slide_idx']
transition_type = rules['transition_type']
# Use the zipfile module to open the .pptx file
with zipfile.ZipFile(pptx_file, 'r') as zip_ref:
# Get the slide XML file
slide_name = 'ppt/slides/slide{}.xml'.format(slide_idx + 1)
try:
zip_ref.getinfo(slide_name)
except KeyError:
# Slide does not exist
return 0.
with zip_ref.open(slide_name) as slide_file:
# 解析XML
tree = ET.parse(slide_file)
root = tree.getroot()
# XML namespace
namespaces = {
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
'p': 'http://schemas.openxmlformats.org/presentationml/2006/main',
}
# Search for the transition element
transition = root.find('.//p:transition', namespaces)
if transition is not None:
# Check if the transition is an expected transition
dissolve = transition.find('.//p:{}'.format(transition_type), namespaces)
if dissolve is not None:
return 1.
else:
return 0.
else:
return 0.

View File

@@ -1,32 +1,35 @@
import functools
import itertools
import logging
#import operator
import os.path
# import operator
from numbers import Number
from typing import Any, Union, cast, Callable, Iterable
from typing import Dict, List, Tuple
import os.path
import itertools
import functools
import openpyxl
import pandas as pd
from openpyxl import Workbook
from openpyxl.worksheet.worksheet import Worksheet
#from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.cell.cell import Cell
#from openpyxl.utils import coordinate_to_tuple
# from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.worksheet.worksheet import Worksheet
from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles
from .utils import _match_value_to_rule, _read_cell_style, read_cell_value
from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles
# from openpyxl.utils import coordinate_to_tuple
logger = logging.getLogger("desktopenv.metric.table")
BOOK = Union[pd.ExcelFile, Workbook, str]
def _parse_sheet_idx( sheet_idx: Union[int, str]
, result: BOOK, expected: BOOK
, result_sheet_names: List[str]
, expected_sheet_names: List[str]
) -> Tuple[BOOK, str]:
def _parse_sheet_idx(sheet_idx: Union[int, str]
, result: BOOK, expected: BOOK
, result_sheet_names: List[str]
, expected_sheet_names: List[str]
) -> Tuple[BOOK, str]:
# function _parse_sheet_idx {{{ #
if isinstance(sheet_idx, int):
index: str = result_sheet_names[sheet_idx]
@@ -49,7 +52,10 @@ def _parse_sheet_idx( sheet_idx: Union[int, str]
return book, index
# }}} function _parse_sheet_idx #
SHEET = Union[pd.DataFrame, Worksheet, List[str]]
def _load_sheet(book: BOOK, index: str) -> SHEET:
# function _load_sheet {{{ #
if isinstance(book, str):
@@ -57,12 +63,12 @@ def _load_sheet(book: BOOK, index: str) -> SHEET:
csv_name: str = "{:}-{:}.csv".format(os.path.splitext(book)[0], index)
with open(csv_name) as f:
csv_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0
, map( lambda l: l.strip()
, reversed(f.read().splitlines())
)
csv_lines: List[str] = list(itertools.dropwhile(lambda l: len(l) == 0
, map(lambda l: l.strip()
, reversed(f.read().splitlines())
)
)
)
)
return csv_lines
if isinstance(book, pd.ExcelFile):
return pd.read_excel(book, index)
@@ -72,7 +78,8 @@ def _load_sheet(book: BOOK, index: str) -> SHEET:
raise NotImplementedError("Not supported workbook format")
# }}} function _load_sheet #
def compare_table(result: str, expected: str, **options) -> float:
def compare_table(result: str, expected: str = None, **options) -> float:
# function compare_table {{{ #
"""
Args:
@@ -92,25 +99,35 @@ def compare_table(result: str, expected: str, **options) -> float:
if result is None:
return 0.
xlworkbookr: Workbook = openpyxl.load_workbook(filename=result)
pdworkbookr = pd.ExcelFile(result)
try:
xlworkbookr: Workbook = openpyxl.load_workbook(filename=result)
pdworkbookr = pd.ExcelFile(result)
except:
return 0.
worksheetr_names: List[str] = pdworkbookr.sheet_names
xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected)
pdworkbooke = pd.ExcelFile(expected)
worksheete_names: List[str] = pdworkbooke.sheet_names
if expected is not None:
parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] =\
functools.partial( _parse_sheet_idx
, result_sheet_names=worksheetr_names
, expected_sheet_names=worksheete_names
)
xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected)
pdworkbooke = pd.ExcelFile(expected)
worksheete_names: List[str] = pdworkbooke.sheet_names
else:
xlworkbooke: Workbook = None
pdworkbooke = None
worksheete_names: List[str] = None
parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] = \
functools.partial(
_parse_sheet_idx,
result_sheet_names=worksheetr_names,
expected_sheet_names=worksheete_names
)
passes = True
for r in options["rules"]:
if r["type"] == "sheet_name":
# Compare Sheet Names {{{ #
metric: bool = worksheetr_names==worksheete_names
metric: bool = worksheetr_names == worksheete_names
logger.debug("Assertion: %s.sheet_names == %s.sheet_names - %s", result, expected, metric)
# }}} Compare Sheet Names #
@@ -171,8 +188,8 @@ def compare_table(result: str, expected: str, **options) -> float:
styles1: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
styles2: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
#number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
#number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
# number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
# number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
metric: bool = styles1 == styles2
logger.debug("Assertion: %s.style == %s.style - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Style (Also Conditional Formatiing) #
@@ -185,11 +202,11 @@ def compare_table(result: str, expected: str, **options) -> float:
sheet1: Worksheet = _load_sheet(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke))
sheet2: Worksheet = _load_sheet(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke))
metric: bool = sheet1.freeze_panes == sheet2.freeze_panes
logger.debug( "Assertion: %s.freeze(%s) == %s.freeze(%s) - %s"
, r["sheet_idx0"], sheet1.freeze_panes
, r["sheet_idx1"], sheet2.freeze_panes
, metric
)
logger.debug("Assertion: %s.freeze(%s) == %s.freeze(%s) - %s"
, r["sheet_idx0"], sheet1.freeze_panes
, r["sheet_idx1"], sheet2.freeze_panes
, metric
)
# }}} Compare Freezing #
elif r["type"] == "zoom":
@@ -201,7 +218,8 @@ def compare_table(result: str, expected: str, **options) -> float:
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
zoom_scale: Number = sheet.sheet_view.zoomScale or 100.
metric: bool = _match_value_to_rule(zoom_scale, r)
logger.debug("Assertion: %s.zoom(%.1f) %s %.1f - %s", r["sheet_idx"], zoom_scale, r["method"], r["ref"], metric)
logger.debug("Assertion: %s.zoom(%.1f) %s %.1f - %s", r["sheet_idx"], zoom_scale, r["method"], r["ref"],
metric)
# }}} Check Zooming #
elif r["type"] == "data_validation":
@@ -228,15 +246,15 @@ def compare_table(result: str, expected: str, **options) -> float:
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
data_validators: List[DataValidation] = sheet.data_validations.dataValidation
total_metric = len(data_validators)>=len(r["dv_props"])
total_metric = len(data_validators) >= len(r["dv_props"])
for dat_vldt in data_validators:
metric = False
for prpt in r["dv_props"]:
metric = metric or all( _match_value_to_rule( getattr(dat_vldt, attrbt)
metric = metric or all(_match_value_to_rule(getattr(dat_vldt, attrbt)
, mr
)\
for attrbt, mr in prpt.items()
)
) \
for attrbt, mr in prpt.items()
)
if metric:
break
total_metric = total_metric and metric
@@ -253,14 +271,14 @@ def compare_table(result: str, expected: str, **options) -> float:
# sheet_idx1: as sheet_idx0
# props: list of str, see utils.load_rows_or_cols
rows1: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
, obj="row"
, **r
)
rows2: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
, obj="row"
, **r
)
rows1: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
, obj="row"
, **r
)
rows2: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
, obj="row"
, **r
)
logger.debug("Rows1: %s", repr(rows1))
logger.debug("Rows2: %s", repr(rows2))
metric: bool = rows1 == rows2
@@ -273,14 +291,14 @@ def compare_table(result: str, expected: str, **options) -> float:
# sheet_idx1: as sheet_idx0
# props: list of str, see utils.load_rows_or_cols
cols1: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
, obj="column"
, **r
)
cols2: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
, obj="column"
, **r
)
cols1: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
, obj="column"
, **r
)
cols2: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
, obj="column"
, **r
)
metric: bool = cols1 == cols2
logger.debug("Assertion: %s[cols] == %s[cols] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Check Row Properties #
@@ -293,21 +311,21 @@ def compare_table(result: str, expected: str, **options) -> float:
# supported attributes: value & those supported by utils._read_cell_style
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
#data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke))
# data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke))
cell: Cell = sheet[r["coordinate"]]
metric: bool = True
for prpt, rule in r["props"].items():
if prpt=="value":
if prpt == "value":
val = read_cell_value(*parse_idx(r["sheet_idx"], result, expected), r["coordinate"])
else:
val = _read_cell_style(prpt, cell)
metric = metric and _match_value_to_rule(val, rule)
logger.debug( "Assertion: %s[%s] :%s - %s"
, r["sheet_idx"], r["coordinate"]
, repr(r["props"]), metric
)
logger.debug("Assertion: %s[%s] :%s - %s"
, r["sheet_idx"], r["coordinate"]
, repr(r["props"]), metric
)
# }}} Check Cell Properties #
else:
@@ -320,6 +338,7 @@ def compare_table(result: str, expected: str, **options) -> float:
return float(passes)
# }}} function compare_table #
def compare_csv(result: str, expected: str, **options) -> float:
if result is None:
return 0.
@@ -335,9 +354,10 @@ def compare_csv(result: str, expected: str, **options) -> float:
result_lines = map(str.lower, result_lines)
expected_lines = map(str.lower, expected_lines)
metric: bool = list(result_lines)==list(expected_lines)
metric: bool = list(result_lines) == list(expected_lines)
return float(metric)
if __name__ == '__main__':
import datetime
import sys
@@ -357,7 +377,8 @@ if __name__ == '__main__':
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
@@ -373,49 +394,49 @@ if __name__ == '__main__':
path1 = "../../任务数据/LibreOffice Calc/Multiply_Time_Number.xlsx"
path2 = "../../任务数据/LibreOffice Calc/Multiply_Time_Number_gold.xlsx"
rules = [ { "type": "check_cell"
, "sheet_idx": 0
, "coordinate": "E3"
, "props": { "value": { "method": "approx:0.001"
, "ref": 191.6667
}
}
rules = [{"type": "check_cell"
, "sheet_idx": 0
, "coordinate": "E3"
, "props": {"value": {"method": "approx:0.001"
, "ref": 191.6667
}
}
}
]
print( compare_table( path1, path2
]
print(compare_table(path1, path2
, rules=rules
)
)
print( compare_table( path2, path2
)
print(compare_table(path2, path2
, rules=rules
)
)
)
# Row Properties
#path1 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA.xlsx"
#path2 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA_gold.xlsx"
#workbook: Workbook = openpyxl.load_workbook(filename=path1)
#worksheet: Worksheet = workbook.active
#for r_no, dms in worksheet.column_dimensions.items():
#print(r_no, type(r_no), type(dms), dms.hidden)
# path1 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA_gold.xlsx"
# workbook: Workbook = openpyxl.load_workbook(filename=path1)
# worksheet: Worksheet = workbook.active
# for r_no, dms in worksheet.column_dimensions.items():
# print(r_no, type(r_no), type(dms), dms.hidden)
# Conditional Formats
#import formulas
#path1 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days.xlsx"
#path2 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold.xlsx"
#path3 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold_test.xlsx"
#workbook: Workbook = openpyxl.load_workbook(filename=path2)
#worksheet: Worksheet = workbook.active
#print(worksheet.conditional_formatting)
#for itm in worksheet.conditional_formatting:
#print(itm.cells)
#for r in itm.rules:
#print( r.type, r.formula, r.dxf.font.color.rgb
#, r.dxf.fill.fgColor.rgb, r.dxf.fill.bgColor.rgb
#)
#condition = formulas.Parser().ast("=" + r.formula[0])[1].compile()
##print(r.type, r.operator, r.dxfId, r.dxf)
#for r in itm.cells:
#for c in r.cells:
#value = worksheet.cell(row=c[0], column=c[1]).value
#print(value, condition(str(value)))
# import formulas
# path1 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold.xlsx"
# path3 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold_test.xlsx"
# workbook: Workbook = openpyxl.load_workbook(filename=path2)
# worksheet: Worksheet = workbook.active
# print(worksheet.conditional_formatting)
# for itm in worksheet.conditional_formatting:
# print(itm.cells)
# for r in itm.rules:
# print( r.type, r.formula, r.dxf.font.color.rgb
# , r.dxf.fill.fgColor.rgb, r.dxf.fill.bgColor.rgb
# )
# condition = formulas.Parser().ast("=" + r.formula[0])[1].compile()
##print(r.type, r.operator, r.dxfId, r.dxf)
# for r in itm.cells:
# for c in r.cells:
# value = worksheet.cell(row=c[0], column=c[1]).value
# print(value, condition(str(value)))

View File

@@ -52,14 +52,17 @@ def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
"""
# read xlsx
with zipfile.ZipFile(xlsx_file, "r") as z_f:
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
sheets: List[_Element] = _sheet_name_selector(workbook_database)
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read())
sparklines: List[_Element] = _sparklines_selector(sheet)
try:
with zipfile.ZipFile(xlsx_file, "r") as z_f:
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
sheets: List[_Element] = _sheet_name_selector(workbook_database)
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read())
sparklines: List[_Element] = _sparklines_selector(sheet)
except zipfile.BadZipFile:
return {}
sparklines_dict: Dict[str, str] = {}
for sp_l in sparklines:
@@ -158,29 +161,32 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# read_cell_value {{{ #
with zipfile.ZipFile(xlsx_file, "r") as z_f:
try:
with z_f.open("xl/sharedStrings.xml") as f:
shared_str_xml: _Element = lxml.etree.fromstring(f.read())
str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
shared_strs: List[str] = [elm.text for elm in str_elements]
except:
logger.debug("Read shared strings error: %s", xlsx_file)
try:
with zipfile.ZipFile(xlsx_file, "r") as z_f:
try:
with z_f.open("xl/sharedStrings.xml") as f:
shared_str_xml: _Element = lxml.etree.fromstring(f.read())
str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
shared_strs: List[str] = [elm.text for elm in str_elements]
except:
logger.debug("Read shared strings error: %s", xlsx_file)
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
sheets: List[_Element] = _sheet_name_selector(workbook_database)
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
sheets: List[_Element] = _sheet_name_selector(workbook_database)
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read())
cells: List[_Element] =\
lxml.cssselect.CSSSelector( 'oo|row>oo|c[r="{:}"]'.format(coordinate)
, namespaces=_xlsx_ns_mapping
)(sheet)
if len(cells)==0:
return None
cell: _Element = cells[0]
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read())
cells: List[_Element] =\
lxml.cssselect.CSSSelector( 'oo|row>oo|c[r="{:}"]'.format(coordinate)
, namespaces=_xlsx_ns_mapping
)(sheet)
if len(cells)==0:
return None
cell: _Element = cells[0]
except zipfile.BadZipFile:
return None
cell: Dict[str, str] = xmltodict.parse( lxml.etree.tostring(cell, encoding="unicode")
, process_namespaces=True