sci-gui-agent-benchmark/desktop_env/evaluators/metrics/others.py

import zipfile
import os.path
import os

import lxml.html
from lxml.html import HtmlElement
from typing import List, Dict
from typing import Union, TypeVar
from mutagen.easyid3 import EasyID3

from .general import diff_text_file
from .utils import _match_value_to_rule

import logging

logger = logging.getLogger("desktopenv.metric.others")

def process_epub(filename: str) -> List[str]:
    file_list: List[str] = []

    base_dir: str = filename + ".dir"
    os.makedirs(base_dir, exist_ok=True)

    try:
        with zipfile.ZipFile(filename, "r") as z_f:
            with z_f.open("toc.ncx") as in_f\
                    , open(os.path.join(base_dir, "toc.ncx"), "w") as out_f:
                contents: str = in_f.read().decode()
                contents = contents.splitlines()
                for l in contents:
                    if "navPoint" not in l:
                        out_f.write(l + "\n")
            file_list.append(os.path.join(base_dir, "toc.ncx"))
            with z_f.open("content.opf") as in_f\
                    , open(os.path.join(base_dir, "content.opf"), "w") as out_f:
                contents: str = in_f.read().decode()
                contents = contents.splitlines()
                for l in contents:
                    if "dc:identifier" not in l:
                        out_f.write(l + "\n")
            file_list.append(os.path.join(base_dir, "content.opf"))
            for f_n in z_f.namelist():
                if f_n.endswith(".html"):
                    with z_f.open(f_n) as in_f\
                            , open(os.path.join(base_dir, f_n), "w") as out_f:
                        html: HtmlElement = lxml.html.fromstring(
                                                ''.join( filter( lambda ch: ch!="\n" and ch!="\r"
                                                               , in_f.read().decode()
                                                               )
                                                       ).encode()
                                              )
                        out_f.write(lxml.html.tostring(html, pretty_print=True, encoding="unicode"))
                    file_list.append(os.path.join(base_dir, f_n))
        logger.debug("%s: %s", filename, file_list)
        return list(sorted(file_list))
    except zipfile.BadZipFile:
        return []

def compare_epub(result: str, expected: str) -> float:
    if result is None:
        return 0.
    result_files: List[str] = process_epub(result)
    expected_files: List[str] = process_epub(expected)

    metric: float = 1.
    for f1, f2 in zip(result_files, expected_files):
        current_metric: float = diff_text_file(f1, f2)
        logger.debug("%s vs %s: %f", f1, f2, current_metric)
        metric *= current_metric
    return metric

V = TypeVar("Value")

def check_mp3_meta(result: str, meta: Dict[str, Dict[str, Union[str, V]]]) -> bool:
    # checks using _match_value_to_rule
    if result is None:
        return 0.

    id3_dict = EasyID3(result)
    metric: bool = True
    for k, r in meta.items():
        value = id3_dict.get(k, "")
        if isinstance(value, list):
            value: str = ",".join(value)
        logger.debug("%s.%s: %s", result, k, value)
        metric = metric and _match_value_to_rule(value, r)
    return float(metric)

if __name__ == "__main__":
    import datetime
    import sys

    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)

    datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")

    file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
    debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
    stdout_handler = logging.StreamHandler(sys.stdout)
    sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))

    file_handler.setLevel(logging.INFO)
    debug_handler.setLevel(logging.DEBUG)
    stdout_handler.setLevel(logging.INFO)
    sdebug_handler.setLevel(logging.DEBUG)

    formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
    file_handler.setFormatter(formatter)
    debug_handler.setFormatter(formatter)
    stdout_handler.setFormatter(formatter)
    sdebug_handler.setFormatter(formatter)

    logger.addHandler(file_handler)
    logger.addHandler(debug_handler)
    logger.addHandler(stdout_handler)
    logger.addHandler(sdebug_handler)

    metric = check_mp3_meta( "snapshots/test/cache/3f05f3b9-29ba-4b6b-95aa-2204697ffc06/Cheng Xiang - Missing You - gt.mp3"
                           , { "title": { "method": "eq"
                                        , "ref": "Missing You"
                                        }
                             , "artist": { "method": "eq"
                                         , "ref": "Cheng Xiang"
                                         }
                             }
                           )
    print(metric)