This commit is contained in:
tsuky_chen
2024-01-23 22:02:27 +08:00
34 changed files with 906 additions and 239 deletions

View File

@@ -7,3 +7,4 @@ from .misc import get_rule, get_accessibility_tree
from .replay import get_replay
from .vlc import get_vlc_playing_info, get_vlc_config
from .vscode import get_vscode_config
from .impress import get_audio_in_slide

View File

@@ -5,18 +5,18 @@ from .docs import find_default_font, contains_page_break, compare_docx_files, co
from .docs import is_first_line_centered, check_file_exists, compare_contains_image
from .docs import evaluate_colored_words_in_tables, check_highlighted_words, evaluate_strike_through_last_paragraph, \
evaluate_conversion, evaluate_spacing, check_italic_font_size_14, evaluate_alignment, get_unique_train_ids, \
check_no_duplicates
from .general import exact_match, fuzzy_match, check_csv, check_accessibility_tree, check_list
check_no_duplicates
from .general import exact_match, fuzzy_match
from .general import check_csv, check_accessibility_tree, run_sqlite3, check_json
from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions
from .slides import check_presenter_console_disable, check_image_stretch_and_center, check_slide_numbers_color, compare_pptx_files, check_strikethrough, \
check_slide_orientation_Portrait, evaluate_presentation_fill_to_rgb_distance, check_left_panel
from .libreoffice import check_libre_locale
from .pdf import check_pdf_pages
#from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom, check_data_validations
from .table import compare_table
from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, compare_images, compare_audios, \
compare_videos
from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions
from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3, check_json
from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed, check_json_settings, check_json_keybindings
from .impress import check_image_stretch_and_center, check_slide_numbers_color, compare_pptx_files, check_strikethrough, \
check_for_audio, check_formula_shape
from .impress import check_slide_orientation_Portrait, contains_mp4_video

View File

@@ -1,6 +1,36 @@
import logging
import xml.etree.ElementTree as ET
from math import sqrt
from pptx import Presentation
from pptx.util import Inches
logger = logging.getLogger("desktopenv.metric.slides")
def check_presenter_console_disable(config_file_path):
try:
tree = ET.parse(config_file_path)
root = tree.getroot()
namespaces = {
'oor': 'http://openoffice.org/2001/registry'
}
for item in root.findall(
".//item[@oor:path='/org.openoffice.Office.Impress/Misc/Start']/prop[@oor:name='EnablePresenterScreen']",
namespaces):
# Check if the value of the configuration item indicates that the presenter console has been disabled
presenter_screen_enabled = item.find('value').text
if presenter_screen_enabled.lower() == 'false':
return 1.
else:
return 0.
return 0.
except Exception as e:
logger.error(f"Error: {e}")
return 0.
def check_image_stretch_and_center(modified_ppt, original_ppt):
# fixme: this func is overfit to this example libreoffice_impress
@@ -72,32 +102,60 @@ def compare_pptx_files(file1_path, file2_path, **options):
prs1 = Presentation(file1_path)
prs2 = Presentation(file2_path)
examine_number_of_slides = options.get("examine_number_of_slides", True)
examine_shape = options.get("examine_shape", True)
examine_text = options.get("examine_text", True)
examine_font_name = options.get("examine_font_name", True)
examine_font_size = options.get("examine_font_size", True)
examine_font_bold = options.get("examine_font_bold", True)
examine_font_italic = options.get("examine_font_italic", True)
examine_color_rgb = options.get("examine_color_rgb", True)
examine_font_underline = options.get("examine_font_underline", True)
# compare the number of slides
if len(prs1.slides) != len(prs2.slides):
if len(prs1.slides) != len(prs2.slides) and examine_number_of_slides:
return False
# compare the content of each slide
for slide1, slide2 in zip(prs1.slides, prs2.slides):
# check if the shapes are the same
for shape1, shape2 in zip(slide1.shapes, slide2.shapes):
if shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
if (
shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height) and examine_shape:
return False
if hasattr(shape1, "text") and hasattr(shape2, "text"):
if shape1.text != shape2.text:
if shape1.text != shape2.text and examine_text:
return False
# check if the paragraphs are the same
for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
# check if the runs are the same
for run1, run2 in zip(para1.runs, para2.runs):
if run1.text != run2.text:
if run1.text != run2.text and examine_text:
return False
# check if the font properties are the same
if run1.font.name != run2.font.name or run1.font.size != run2.font.size or run1.font.bold != run2.font.bold or run1.font.italic != run2.font.italic or run1.font.color.rgb != run2.font.color.rgb:
if run1.font.name != run2.font.name and examine_font_name:
return False
if run1.font.size != run2.font.size and examine_font_size:
return False
if run1.font.bold != run2.font.bold and examine_font_bold:
return False
if run1.font.italic != run2.font.italic and examine_font_italic:
return False
if run1.font.color.rgb != run2.font.color.rgb and examine_color_rgb:
return False
if run1.font.underline != run2.font.underline and examine_font_underline:
return False
# fixme: Actually there are more properties to be compared, but we cannot get them through pptx
return True
@@ -126,32 +184,6 @@ def check_strikethrough(pptx_path, rules):
return True
def has_audio_on_page(slide):
for shape in slide.shapes:
if shape.shape_type == 13:
return True
return False
def check_for_audio(prs):
prs = Presentation(prs)
for i, slide in enumerate(prs.slides):
if has_audio_on_page(slide):
return 1
return 0
def check_formula_shape(prs):
prs = Presentation(prs)
slide = prs.slides[13]
for shape in slide.shapes:
if shape.has_text_frame and shape.shape_type == 1:
return 1
return 0
def check_slide_orientation_Portrait(pptx_path):
presentation = Presentation(pptx_path)
@@ -163,20 +195,40 @@ def check_slide_orientation_Portrait(pptx_path):
return 0
def contains_mp4_video(pptx_path):
prs = Presentation(pptx_path)
for slide in prs.slides:
for shape in slide.shapes:
if shape.shape_type == 16:
if shape.media_type == 3:
return 1
return 0
def evaluate_presentation_fill_to_rgb_distance(pptx_file, rules):
rgb = rules["rgb"]
def slide_fill_distance_to_rgb(_slide, _rgb):
fill = _slide.background.fill
if fill.type == 1:
r1, g1, b1 = fill.fore_color.rgb
r2, g2, b2 = _rgb
return sqrt((r1 - r2) ** 2 + (g1 - g2) ** 2 + (b1 - b2) ** 2) / sqrt(255 ** 2 + 255 ** 2 + 255 ** 2)
return 1
prs = Presentation(pptx_file)
similarity = 1 - sum(slide_fill_distance_to_rgb(slide, rgb) for slide in prs.slides) / len(prs.slides)
return similarity
if __name__ == "__main__":
path1 = "../../任务数据/LibreOffice Impress/Change_Color_Slide_Number_gold_textbox.pptx"
presentation = Presentation(path1)
def check_left_panel(accessibility_tree):
namespaces = {
'st': 'uri:deskat:state.at-spi.gnome.org',
'cp': 'uri:deskat:component.at-spi.gnome.org'
}
for i, sl in enumerate(presentation.slides):
for j, sh in enumerate(sl.shapes):
print(i, j, sh, sh.name, sh.shape_type, sh.text)
root = ET.fromstring(accessibility_tree)
for root_pane in root.iter('root-pane'):
for panel in root_pane.iter('panel'):
for split_pane in panel.iter('split-pane'):
# Get the left panel
if split_pane.attrib.get("{{{}}}parentcoord".format(namespaces['cp'])) == "(0, 0)":
# Get the visible attribute
visible = split_pane.attrib.get("{{{}}}visible".format(namespaces['st']))
if visible:
# decide if it is left panel
return 1.
return 0.

View File

@@ -1,5 +1,5 @@
import logging
import operator
#import operator
from numbers import Number
from typing import Any, Union, cast, Callable
from typing import Dict, List, Tuple
@@ -14,7 +14,8 @@ from openpyxl.worksheet.worksheet import Worksheet
#from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.datavalidation import DataValidation
from .utils import load_charts, load_sparklines, _match_value_to_rule
from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles
from .utils import _match_value_to_rule
logger = logging.getLogger("desktopenv.metric.table")
@@ -160,18 +161,19 @@ def compare_table(result: str, expected: str, **options) -> float:
logger.debug("Assertion: %s[chart] == %s[chart] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Charts #
elif r["type"] == "number_format":
# Compare Number Formats {{{ #
elif r["type"] == "style":
# Compare Style (Also Conditional Formatiing) {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# props: list of str indicating concerned styles
sheet1: Worksheet = _load_sheet(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke))
sheet2: Worksheet = _load_sheet(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke))
number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.data_type=="n"]
number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.data_type=="n"]
metric: bool = number_formats1 == number_formats2
logger.debug("Assertion: %s.nf == %s.nf - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Number Formats #
styles1: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
styles2: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
#number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
#number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
metric: bool = styles1 == styles2
logger.debug("Assertion: %s.style == %s.style - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Style (Also Conditional Formatiing) #
elif r["type"] == "freeze":
# Compare Freezing {{{ #
@@ -203,7 +205,7 @@ def compare_table(result: str, expected: str, **options) -> float:
elif r["type"] == "data_validation":
# Check Data Validation {{{ #
# sheet_idx: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# dv_props: list of dict like {attribute: "method": str, "ref": anythin}
# dv_props: list of dict like {attribute: {"method": str, "ref": anything}}
# available attributes:
# * ranges
# * type
@@ -224,14 +226,14 @@ def compare_table(result: str, expected: str, **options) -> float:
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
data_validators: List[DataValidation] = sheet.data_validations.dataValidation
total_metric = True
total_metric = len(data_validators)>=len(r["dv_props"])
for dat_vldt in data_validators:
metric = False
for r in r["dv_props"]:
for prpt in r["dv_props"]:
metric = metric or all( _match_value_to_rule( getattr(dat_vldt, attrbt)
, mr
)\
for attrbt, mr in r.items()
for attrbt, mr in prpt.items()
)
if metric:
break
@@ -243,6 +245,44 @@ def compare_table(result: str, expected: str, **options) -> float:
metric: bool = total_metric
# }}} Check Data Validation #
elif r["type"] == "row_props":
# Check Row Properties {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# props: list of str, see utils.load_rows_or_cols
rows1: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
, obj="row"
, **r
)
rows2: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
, obj="row"
, **r
)
logger.debug("Rows1: %s", repr(rows1))
logger.debug("Rows2: %s", repr(rows2))
metric: bool = rows1 == rows2
logger.debug("Assertion: %s[rows] == %s[rows] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Check Row Properties #
elif r["type"] == "col_props":
# Check Row Properties {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# props: list of str, see utils.load_rows_or_cols
cols1: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
, obj="column"
, **r
)
cols2: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
, obj="column"
, **r
)
metric: bool = cols1 == cols2
logger.debug("Assertion: %s[cols] == %s[cols] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Check Row Properties #
else:
raise NotImplementedError("Unimplemented sheet check: {:}".format(r["type"]))
@@ -254,15 +294,48 @@ def compare_table(result: str, expected: str, **options) -> float:
# }}} function compare_table #
if __name__ == '__main__':
path1 = "../../任务数据/LibreOffice Calc/Freeze_row_column.xlsx"
path2 = "../../任务数据/LibreOffice Calc/Freeze_row_column_gold.xlsx"
import datetime
import sys
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
path1 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days.xlsx"
path2 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold.xlsx"
rules = [ { "type": "sheet_data"
, "sheet_idx0": 0
, "sheet_idx1": "EI0"
}
, { "type": "freeze"
, { "type": "style"
, "sheet_idx0": 0
, "sheet_idx1": "EI0"
, "props": ["bgcolor"]
}
]
print( compare_table( path1, path2
@@ -274,17 +347,31 @@ if __name__ == '__main__':
)
)
#path = "../../任务数据/LibreOffice Calc/Order_Id_Mark_Pass_Fail_gold.xlsx"
#print( check_data_validations( path, [ { "ranges": { "method": "spreadsheet_range"
#, "ref": ["D2:D29", "D2:D1048576"]
#}
#, "type": { "method": "eq"
#, "ref": "list"
#}
#, "formula1": { "method": "str_set_eq"
#, "ref": ["Pass", "Fail", "Held"]
#}
#}
#]
#)
#)
# Row Properties
#path1 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA.xlsx"
#path2 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA_gold.xlsx"
#workbook: Workbook = openpyxl.load_workbook(filename=path1)
#worksheet: Worksheet = workbook.active
#for r_no, dms in worksheet.column_dimensions.items():
#print(r_no, type(r_no), type(dms), dms.hidden)
# Conditional Formats
#import formulas
#path1 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days.xlsx"
#path2 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold.xlsx"
#path3 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold_test.xlsx"
#workbook: Workbook = openpyxl.load_workbook(filename=path2)
#worksheet: Worksheet = workbook.active
#print(worksheet.conditional_formatting)
#for itm in worksheet.conditional_formatting:
#print(itm.cells)
#for r in itm.rules:
#print( r.type, r.formula, r.dxf.font.color.rgb
#, r.dxf.fill.fgColor.rgb, r.dxf.fill.bgColor.rgb
#)
#condition = formulas.Parser().ast("=" + r.formula[0])[1].compile()
##print(r.type, r.operator, r.dxfId, r.dxf)
#for r in itm.cells:
#for c in r.cells:
#value = worksheet.cell(row=c[0], column=c[1]).value
#print(value, condition(str(value)))

View File

@@ -1,6 +1,6 @@
import logging
import zipfile
from typing import Any, TypeVar, Union, Iterable, Optional
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
from typing import Dict, List, Set, Match
from urllib.parse import urlparse, urlunparse
import re
@@ -17,6 +17,12 @@ from openpyxl import Workbook
from openpyxl.chart._chart import ChartBase
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.dimensions import DimensionHolder
from openpyxl.formatting.formatting import ConditionalFormattingList
#from openpyxl.utils import get_column_letter
from openpyxl.cell.cell import Cell
from openpyxl.styles.differential import DifferentialStyle
import formulas
V = TypeVar("Value")
@@ -31,9 +37,8 @@ _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_sheet_name_selector = lxml.cssselect.CSSSelector("oo|sheets>oo|sheet", namespaces=_xlsx_ns_mapping)
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
# function load_sparklines {{{ #
"""
This function modifies data_frame in-place
Args:
xlsx_file (str): path to xlsx
sheet_name (str): sheet name
@@ -64,6 +69,7 @@ def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
)
sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
return sparklines_dict
# }}} function load_sparklines #
# Available Chart Properties:
@@ -75,6 +81,7 @@ def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
# direction: "bar" (hori) | "col" (vert)
# xtitle, ytitle, ztitle: str
def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
# function load_charts {{{ #
"""
Args:
xlsx_file (Workbook): concerned excel book
@@ -83,7 +90,12 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
giving the concerned chart properties
Returns:
Dict[str, Any]: information of charts
Dict[str, Any]: information of charts, dict like
{
<str representing data source>: {
<str as property>: anything
}
}
"""
# workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
@@ -140,7 +152,132 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
chart_set[series] = info
return chart_set
# }}} function load_charts #
# Supported Styles:
# number_format
# font_name - str
# font_family - float
# font_color - in aRGB, e.g., FF000000 is black
# font_bold - bool
# font_italic - bool
# fill_type - "patternFill" | "gradientFill"
# bgcolor - in aRGB, e.g., FFFF0000 is red
# fgcolor - in aRGB, e.g., FF00FFFF is yellow
def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any:
if style_name=="number_format":
return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode)\
if cell.value is not None and cell.data_type=="n" else None
elif style_name=="font_name":
return (diff_style or cell).font.name if cell.value is not None else None
elif style_name=="font_family":
return (diff_style or cell).font.family if cell.value is not None else None
elif style_name=="font_color":
return (diff_style or cell).font.color.rgb if cell.value is not None else None
elif style_name=="font_bold":
return (diff_style or cell).font.bold if cell.value is not None else None
elif style_name=="font_italic":
return (diff_style or cell).font.italic if cell.value is not None else None
elif style_name=="fill_type":
return (diff_style or cell).fill.tagname
elif style_name=="bgcolor":
return (diff_style or cell).fill.bgColor.rgb
elif style_name=="fgcolor":
return (diff_style or cell).fill.fgColor.rgb
else:
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, List[Any]]:
# function load_xlsx_styles {{{ #
"""
Args:
xlsx_file (Workbook): concerned excel book
sheet_name (str): sheet name
options (Dict[str, List[str]): dick like {"props": list of str} giving
the concerned styles
Returns:
Dict[str, List[Any]]: dict like
{
<str as cell coordinates>: list of anything indicating concerned
property values
}
"""
worksheet: Worksheet = xlsx_file[sheet_name]
style_dict: Dict[str, List[Any]] = {}
concerned_styles: List[str] = options.get("props", [])
# Handles Cell Styles
for col in worksheet.iter_cols():
for c in col:
style_list: List[Any] = []
for st in concerned_styles:
style_list.append(_read_cell_style(st, c))
style_dict[c.coordinate] = style_list
# Handles Conditional Formatting
conditional_formattings: ConditionalFormattingList = worksheet.conditional_formatting
formula_parser = formulas.Parser()
for fmt in conditional_formattings:
for r in fmt.rules:
active_cells: List[Cell] = []
if r.type == "expression":
condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile()
for rge in fmt.cells:
for c in rge.cells:
cell: Cell = worksheet.cell(row=c[0], column=c[1])
if condition(str(cell.value)):
active_cells.append(cell)
else:
raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
for c in active_cells:
style_dict[c.coordinate] = [_read_cell_style(st, c, r.dxf) for st in concerned_styles]
return style_dict
# }}} function load_xlsx_styles #
# Available Row Properties:
# hidden
# collapsed
# height
#
# Available Column Properties:
# width
# auto_size
# hidden
# collapsed
# min
# max
def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options)\
-> Dict[Union[int, str], Dict[str, Any]]:
# function load_rows_or_cols {{{ #
"""
Args:
xlsx_file (Workbook): concerned excel book
sheet_name (str): sheet name
options (Dict[str, List[str]]): dict like
{"obj": "row" | "column", "props": list of str} giving the concerned
row/column properties
Returns:
Dict[Union[int, str], Dict[str, Any]]: row/column information
"""
worksheet: Worksheet = xlsx_file[sheet_name]
objs: DimensionHolder = getattr(worksheet, "{:}_dimensions".format(options["obj"]))
obj_set: Dict[int, Any] = {}
obj_props: Set[str] = set(options.get("props", []))
for obj_no, obj_dms in objs.items():
info_dict: Dict[str, Any] = {}
for prop in obj_props:
info_dict[prop] = getattr(obj_dms, prop)
obj_set[obj_no] = info_dict
return obj_set
# }}} function load_rows_or_cols #
def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
return all(k in item and item[k] == val for k, val in pattern.items())

View File

@@ -10,6 +10,8 @@ import imagehash
import librosa
import numpy as np
from PIL import Image
from fastdtw import fastdtw
from scipy.spatial.distance import cosine
from skimage.metrics import structural_similarity as ssim
logger = logging.getLogger("desktopenv.metrics.vlc")
@@ -111,11 +113,10 @@ def compare_images(image1_path, image2_path):
return similarity_index
def compare_audios(audio_path_1, audio_path_2, max_distance=1000):
def compare_audios(audio_path_1, audio_path_2):
"""
Compare two audio files and return a similarity score in the range [0, 1].
audio_path_1, audio_path_2: paths to the audio files to compare
max_distance: an empirically determined maximum expected DTW distance
"""
# Example Usage:
# similarity = compare_audios_simple('path_to_audio1.mp3', 'path_to_audio2.mp3')
@@ -125,21 +126,31 @@ def compare_audios(audio_path_1, audio_path_2, max_distance=1000):
if not audio_path_1 or not audio_path_2:
return 0
# Load the audio files and extract MFCC features
y1, sr1 = librosa.load(audio_path_1)
y2, sr2 = librosa.load(audio_path_2)
# Extract MFCC features
mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1)
y2, sr2 = librosa.load(audio_path_2)
mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2)
# Compute Dynamic Time Warping distance
distance, path = librosa.sequence.dtw(mfcc1.T, mfcc2.T)
# Normalize the MFCC features
mfcc1 = librosa.util.normalize(mfcc1, axis=1)
mfcc2 = librosa.util.normalize(mfcc2, axis=1)
# Normalize distance to get a similarity score
normalized_distance = np.mean(distance) / max_distance
similarity_score = 1 - min(normalized_distance, 1) # Ensure the score is within [0, 1]
# Define a lambda function to compute cosine distance
dist_func = lambda x, y: cosine(x, y)
return similarity_score
# Use the DTW algorithm to find the best alignment path
distance, path = fastdtw(mfcc1.T, mfcc2.T, dist=dist_func)
# Calculate the similarity score, here we use 1/(1+distance) to convert distance to a similarity score
similarity = 1 / (1 + distance)
return similarity
def compare_audios_by_dl_model(audio_path_1, audio_path_2):
pass
def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=5):