import zipfile import os.path import os import lxml.html from lxml.html import HtmlElement from typing import List, Dict from typing import Union, TypeVar from mutagen.easyid3 import EasyID3 from .general import diff_text_file from .utils import _match_value_to_rule import logging logger = logging.getLogger("desktopenv.metric.others") def process_epub(filename: str) -> List[str]: file_list: List[str] = [] base_dir: str = filename + ".dir" os.makedirs(base_dir, exist_ok=True) try: with zipfile.ZipFile(filename, "r") as z_f: with z_f.open("toc.ncx") as in_f\ , open(os.path.join(base_dir, "toc.ncx"), "w") as out_f: contents: str = in_f.read().decode() contents = contents.splitlines() for l in contents: if "navPoint" not in l: out_f.write(l + "\n") file_list.append(os.path.join(base_dir, "toc.ncx")) with z_f.open("content.opf") as in_f\ , open(os.path.join(base_dir, "content.opf"), "w") as out_f: contents: str = in_f.read().decode() contents = contents.splitlines() for l in contents: if "dc:identifier" not in l: out_f.write(l + "\n") file_list.append(os.path.join(base_dir, "content.opf")) for f_n in z_f.namelist(): if f_n.endswith(".html"): with z_f.open(f_n) as in_f\ , open(os.path.join(base_dir, f_n), "w") as out_f: html: HtmlElement = lxml.html.fromstring( ''.join( filter( lambda ch: ch!="\n" and ch!="\r" , in_f.read().decode() ) ).encode() ) out_f.write(lxml.html.tostring(html, pretty_print=True, encoding="unicode")) file_list.append(os.path.join(base_dir, f_n)) logger.debug("%s: %s", filename, file_list) return list(sorted(file_list)) except zipfile.BadZipFile: return [] def compare_epub(result: str, expected: str) -> float: if result is None: return 0. result_files: List[str] = process_epub(result) expected_files: List[str] = process_epub(expected) metric: float = 1. for f1, f2 in zip(result_files, expected_files): current_metric: float = diff_text_file(f1, f2) logger.debug("%s vs %s: %f", f1, f2, current_metric) metric *= current_metric return metric V = TypeVar("Value") def check_mp3_meta(result: str, meta: Dict[str, Dict[str, Union[str, V]]]) -> bool: # checks using _match_value_to_rule if result is None: return 0. id3_dict = EasyID3(result) metric: bool = True for k, r in meta.items(): value = id3_dict.get(k, "") if isinstance(value, list): value: str = ",".join(value) logger.debug("%s.%s: %s", result, k, value) metric = metric and _match_value_to_rule(value, r) return float(metric) if __name__ == "__main__": import datetime import sys logger = logging.getLogger() logger.setLevel(logging.DEBUG) datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str))) debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str))) stdout_handler = logging.StreamHandler(sys.stdout) sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str))) file_handler.setLevel(logging.INFO) debug_handler.setLevel(logging.DEBUG) stdout_handler.setLevel(logging.INFO) sdebug_handler.setLevel(logging.DEBUG) formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") file_handler.setFormatter(formatter) debug_handler.setFormatter(formatter) stdout_handler.setFormatter(formatter) sdebug_handler.setFormatter(formatter) logger.addHandler(file_handler) logger.addHandler(debug_handler) logger.addHandler(stdout_handler) logger.addHandler(sdebug_handler) metric = check_mp3_meta( "snapshots/test/cache/3f05f3b9-29ba-4b6b-95aa-2204697ffc06/Cheng Xiang - Missing You - gt.mp3" , { "title": { "method": "eq" , "ref": "Missing You" } , "artist": { "method": "eq" , "ref": "Cheng Xiang" } } ) print(metric)