From 1e0a78a453fe7a573f0827ecc02b5df7283c9a40 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Sat, 9 Mar 2024 00:30:28 +0800 Subject: [PATCH] Add none file handling for general --- desktop_env/evaluators/metrics/general.py | 77 ++++++++++++++--------- 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index c68fb90..02ad5bb 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -1,29 +1,30 @@ import csv +import datetime +import difflib import functools import json -import yaml +import logging import operator import re -import pdfplumber import sqlite3 from numbers import Number from typing import Callable, Any, Union from typing import Dict, List, Pattern -import datetime -import pandas as pd + import lxml.etree +import pandas as pd +import pdfplumber +import yaml +from docx import Document from lxml.cssselect import CSSSelector from lxml.etree import _Element from rapidfuzz import fuzz -from docx import Document -import difflib from .utils import _match_record, _match_value_to_rule -import logging - logger = logging.getLogger("desktopenv.metric.general") + def check_include_exclude(result: str, rules: Dict[str, List[str]]) -> float: if result is None: return 0. @@ -71,6 +72,7 @@ def is_in_list(result, rules) -> float: else: return 0. + def diff_text_file(result: str, expect: str) -> float: if result is None: return 0. @@ -81,6 +83,7 @@ def diff_text_file(result: str, expect: str) -> float: expected_lines: List[str] = f.read().splitlines() return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio() + def fuzzy_match(result, rules) -> float: expect = rules["expected"] @@ -88,7 +91,7 @@ def fuzzy_match(result, rules) -> float: def fuzzy_place_math(result_file_path, rules) -> float: - expect = rules["expected"] # a list of possible answers + expect = rules["expected"] # a list of possible answers # read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line doc = Document(result_file_path) words_list = [] @@ -102,6 +105,7 @@ def fuzzy_place_math(result_file_path, rules) -> float: return 0 return 1 + def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float: """ Args: @@ -209,10 +213,10 @@ def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float: return 0. if "text" in r: - match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \ - else (lambda a, b: fuzz.ratio(a, b) / 100.) - , r["text"] - ) + match_func: Callable[[str], Number] = functools.partial(operator.eq if r["exact"] \ + else (lambda a, b: fuzz.ratio(a, b) / 100.) + , r["text"] + ) match_score: Number = 0 for elm in elements: match_score = max(match_score, match_func(elm.text or None)) @@ -285,7 +289,7 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str return float(metric) -def check_direct_json_object(result, rules)->float: +def check_direct_json_object(result, rules) -> float: """ One of the most commonly used function to evalute. Compare two json objects directly. @@ -320,7 +324,11 @@ def check_direct_json_object(result, rules)->float: return 0. return 1.0 + def compare_time_in_speedtest_results(speedtest_result_path, time_diff): + if not speedtest_result_path: + return 0 + # open the speedtest results file(csv) date_col = None with open(speedtest_result_path, 'r') as f: @@ -333,12 +341,17 @@ def compare_time_in_speedtest_results(speedtest_result_path, time_diff): for date in reader[date_col]: date_time = date[-5:] # compare the date time with the current date time, if time diff less than time_diff para, then return true - if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time, '%H:%M')).total_seconds()) / 60 < int(time_diff): - return False - return True + if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time, + '%H:%M')).total_seconds()) / 60 < int( + time_diff): + return 0 + return 1 def is_included_all_json_objects(gold_file_path, result_file_path): + if not gold_file_path or not result_file_path: + return 0 + print("gold_file_path: ") print(gold_file_path) print("result_file_path: ") @@ -350,8 +363,8 @@ def is_included_all_json_objects(gold_file_path, result_file_path): result_json = json.load(fr) for key in gold_json.keys(): if key not in result_json.keys() or gold_json[key] != result_json[key]: - return False - return True + return 0 + return 1 def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path): @@ -373,31 +386,32 @@ def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path): if len(false_list) > 0: print("false_list: ") print(false_list) - return False + return 0 else: - return True + return 1 def file_contains(file_path, config): # file_path ends with .txt - if not file_path : - return False + if not file_path: + return 1 with open(file_path, 'r') as f: file_text = f.read() for text in config["expected"]: if text not in file_text: - return False - return True + return 0 + return 1 + def check_csv_line_number(file_path, line_number): # check file_path suffix if not file_path.endswith('.csv'): - return False + return 0 # check line number with open(file_path, 'r') as f: reader = csv.reader(f) line_count = sum(1 for row in reader) - return True if line_count == int(line_number["expected"]) else False + return 1 if line_count == int(line_number["expected"]) else 0 def compare_terminal_and_txt(txt_file_path, terminal_output): @@ -405,7 +419,7 @@ def compare_terminal_and_txt(txt_file_path, terminal_output): with open(txt_file_path, 'r') as f: txt_file_content = f.read() # compare terminal output with txt file content - return True if terminal_output == txt_file_content else False + return 1 if terminal_output == txt_file_content else 0 def compare_python_pure_text(py_file_path, gold_file_path): @@ -414,13 +428,18 @@ def compare_python_pure_text(py_file_path, gold_file_path): print(py_file_path) print("gold_file_path: ") print(gold_file_path) + # gold_file_path = gold_file_path.replace('.txt', '.py') def remove_whitespace(text): return ''.join(text.split()) + with open(py_file_path, 'r') as file1: content1 = file1.read() with open(gold_file_path, 'r') as file2: content2 = file2.read() content1_no_whitespace = remove_whitespace(content1) content2_no_whitespace = remove_whitespace(content2) - return content1_no_whitespace == content2_no_whitespace \ No newline at end of file + if content1_no_whitespace == content2_no_whitespace: + return 1 + else: + return 0