Add none file handling for general

2024-03-09 00:30:28 +08:00
parent 4de0eff703
commit 1e0a78a453
1 changed files with 48 additions and 29 deletions
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -1,29 +1,30 @@
 import csv
 import datetime
 import difflib
 import functools
 import json
-import yaml
+import logging
 import operator
 import re
 import pdfplumber
 import sqlite3
 from numbers import Number
 from typing import Callable, Any, Union
 from typing import Dict, List, Pattern
-import datetime
+
 import pandas as pd
 import lxml.etree
 import pandas as pd
 import pdfplumber
 import yaml
 from docx import Document
 from lxml.cssselect import CSSSelector
 from lxml.etree import _Element
 from rapidfuzz import fuzz
 from docx import Document
 import difflib
 from .utils import _match_record, _match_value_to_rule
 import logging
 logger = logging.getLogger("desktopenv.metric.general")
 def check_include_exclude(result: str, rules: Dict[str, List[str]]) -> float:
    if result is None:
        return 0.
@@ -71,6 +72,7 @@ def is_in_list(result, rules) -> float:
    else:
        return 0.
 def diff_text_file(result: str, expect: str) -> float:
    if result is None:
        return 0.
@@ -81,6 +83,7 @@ def diff_text_file(result: str, expect: str) -> float:
        expected_lines: List[str] = f.read().splitlines()
    return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio()
 def fuzzy_match(result, rules) -> float:
    expect = rules["expected"]
@@ -88,7 +91,7 @@ def fuzzy_match(result, rules) -> float:
 def fuzzy_place_math(result_file_path, rules) -> float:
-    expect = rules["expected"] # a list of possible answers
+    expect = rules["expected"]  # a list of possible answers
    # read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line
    doc = Document(result_file_path)
    words_list = []
@@ -102,6 +105,7 @@ def fuzzy_place_math(result_file_path, rules) -> float:
            return 0
    return 1
 def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
    """
    Args:
@@ -209,10 +213,10 @@ def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float:
            return 0.
        if "text" in r:
-            match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \
+            match_func: Callable[[str], Number] = functools.partial(operator.eq if r["exact"] \
-                                                                               else (lambda a, b: fuzz.ratio(a, b) / 100.)
+                                                                        else (lambda a, b: fuzz.ratio(a, b) / 100.)
-                                                                   , r["text"]
+                                                                    , r["text"]
-                                                                   )
+                                                                    )
            match_score: Number = 0
            for elm in elements:
                match_score = max(match_score, match_func(elm.text or None))
@@ -285,7 +289,7 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
    return float(metric)
-def check_direct_json_object(result, rules)->float:
+def check_direct_json_object(result, rules) -> float:
    """
    One of the most commonly used function to evalute.
    Compare two json objects directly.
@@ -320,7 +324,11 @@ def check_direct_json_object(result, rules)->float:
                return 0.
        return 1.0
 def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
    if not speedtest_result_path:
        return 0
    # open the speedtest results file(csv)
    date_col = None
    with open(speedtest_result_path, 'r') as f:
@@ -333,12 +341,17 @@ def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
        for date in reader[date_col]:
            date_time = date[-5:]
            # compare the date time with the current date time, if time diff less than time_diff para, then return true
-            if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time, '%H:%M')).total_seconds()) / 60 < int(time_diff):
+            if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time,
-                return False
+                                                                                                    '%H:%M')).total_seconds()) / 60 < int(
-        return True
+                    time_diff):
                return 0
        return 1
 def is_included_all_json_objects(gold_file_path, result_file_path):
    if not gold_file_path or not result_file_path:
        return 0
    print("gold_file_path: ")
    print(gold_file_path)
    print("result_file_path: ")
@@ -350,8 +363,8 @@ def is_included_all_json_objects(gold_file_path, result_file_path):
        result_json = json.load(fr)
    for key in gold_json.keys():
        if key not in result_json.keys() or gold_json[key] != result_json[key]:
-            return False
+            return 0
-    return True
+    return 1
 def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
@@ -373,31 +386,32 @@ def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
    if len(false_list) > 0:
        print("false_list: ")
        print(false_list)
-        return False
+        return 0
    else:
-        return True
+        return 1
 def file_contains(file_path, config):
    # file_path ends with .txt
-    if not file_path :
+    if not file_path:
-        return False
+        return 1
    with open(file_path, 'r') as f:
        file_text = f.read()
    for text in config["expected"]:
        if text not in file_text:
-            return False
+            return 0
-    return True
+    return 1
 def check_csv_line_number(file_path, line_number):
    # check file_path suffix
    if not file_path.endswith('.csv'):
-        return False
+        return 0
    # check line number
    with open(file_path, 'r') as f:
        reader = csv.reader(f)
        line_count = sum(1 for row in reader)
-    return True if line_count == int(line_number["expected"]) else False
+    return 1 if line_count == int(line_number["expected"]) else 0
 def compare_terminal_and_txt(txt_file_path, terminal_output):
@@ -405,7 +419,7 @@ def compare_terminal_and_txt(txt_file_path, terminal_output):
    with open(txt_file_path, 'r') as f:
        txt_file_content = f.read()
    # compare terminal output with txt file content
-    return True if terminal_output == txt_file_content else False
+    return 1 if terminal_output == txt_file_content else 0
 def compare_python_pure_text(py_file_path, gold_file_path):
@@ -414,13 +428,18 @@ def compare_python_pure_text(py_file_path, gold_file_path):
    print(py_file_path)
    print("gold_file_path: ")
    print(gold_file_path)
    # gold_file_path = gold_file_path.replace('.txt', '.py')
    def remove_whitespace(text):
        return ''.join(text.split())
    with open(py_file_path, 'r') as file1:
        content1 = file1.read()
    with open(gold_file_path, 'r') as file2:
        content2 = file2.read()
    content1_no_whitespace = remove_whitespace(content1)
    content2_no_whitespace = remove_whitespace(content2)
-    return content1_no_whitespace == content2_no_whitespace
+    if content1_no_whitespace == content2_no_whitespace:
        return 1
    else:
        return 0