diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index 3eceed5..706f3ba 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -3,8 +3,10 @@ import os import re import xml.etree.ElementTree as ET import zipfile +from io import BytesIO from typing import List, Dict, Any +from PIL import Image from docx import Document from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT from docx.shared import RGBColor @@ -23,6 +25,9 @@ def find_default_font(config_file_path, rules): default_font = None expected_font = rules["font_name"] + if not config_file_path: + return 0 + try: tree = ET.parse(config_file_path) root = tree.getroot() @@ -42,6 +47,9 @@ def find_default_font(config_file_path, rules): def contains_page_break(docx_file): + if not docx_file: + return 0 + doc = Document(docx_file) namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} @@ -62,6 +70,9 @@ def compare_docx_files(file1, file2, **options): ignore_order = options.get('ignore_order', False) content_only = options.get('content_only', False) + if not file1 or not file2: + return 0 + def get_paragraph_texts_odt(document): paragraphs = document.getElementsByType(P) paragraph_texts = [] @@ -139,6 +150,9 @@ def compare_docx_files(file1, file2, **options): def compare_init_lines(file1, file2): + if not file1 or not file2: + return 0 + doc1 = Document(file1) doc2 = Document(file2) @@ -156,6 +170,9 @@ def compare_init_lines(file1, file2): def compare_docx_tables(docx_file1, docx_file2): + if not docx_file1 or not docx_file2: + return 0 + doc1 = Document(docx_file1) doc2 = Document(docx_file2) @@ -181,11 +198,10 @@ def compare_docx_tables(docx_file1, docx_file2): return 1 -from io import BytesIO -from PIL import Image - - def compare_docx_images(docx_file1, docx_file2): + if not docx_file1 or not docx_file2: + return 0 + doc1 = Document(docx_file1) doc2 = Document(docx_file2) @@ -211,6 +227,9 @@ import pytesseract def compare_image_text(image_path, rule): + if not image_path: + return 0 + img = Image.open(image_path) img_text = pytesseract.image_to_string(img) if rule['type'] == 'text': @@ -220,6 +239,9 @@ def compare_image_text(image_path, rule): def compare_line_spacing(docx_file1, docx_file2): + if not docx_file1 or not docx_file2: + return 0 + if not compare_docx_files(docx_file1, docx_file2): return 0 doc1 = Document(docx_file1) @@ -241,6 +263,9 @@ def compare_line_spacing(docx_file1, docx_file2): def compare_insert_equation(docx_file1, docx_file2): + if not docx_file1 or not docx_file2: + return 0 + if not compare_docx_files(docx_file1, docx_file2): return 0 @@ -256,6 +281,9 @@ def compare_insert_equation(docx_file1, docx_file2): def compare_font_names(docx_file, rules: List[Dict[str, Any]]): + if not docx_file: + return 0 + doc = Document(docx_file) expected_font = rules["font_name"] @@ -268,6 +296,9 @@ def compare_font_names(docx_file, rules: List[Dict[str, Any]]): def compare_subscript_contains(docx_file1, docx_file2): + if not docx_file1 or not docx_file2: + return 0 + doc1 = Document(docx_file1) doc2 = Document(docx_file2) @@ -280,6 +311,9 @@ def compare_subscript_contains(docx_file1, docx_file2): def has_page_numbers_in_footers(docx_file): + if not docx_file: + return 0 + doc = Document(docx_file) for section in doc.sections: @@ -294,6 +328,9 @@ def has_page_numbers_in_footers(docx_file): def is_first_line_centered(docx_file): + if not docx_file: + return 0 + doc = Document(docx_file) first_paragraph = doc.paragraphs[0] @@ -302,11 +339,16 @@ def is_first_line_centered(docx_file): def check_file_exists(directory, filename): + if not directory or not filename: + return 0 file_path = os.path.join(directory, filename) return 1 if os.path.isfile(file_path) else 0 def check_tabstops(docx_file1, docx_file2, **kwargs) -> float: + if not docx_file1 or not docx_file2: + return .0 + doc1: Document = Document(docx_file1) doc2: Document = Document(docx_file2) para1 = [p for p in doc1.paragraphs if p.text.strip()] @@ -342,6 +384,9 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float: def compare_contains_image(docx_file1, docx_file2): + if not docx_file1 or not docx_file2: + return 0 + doc1 = Document(docx_file1) doc2 = Document(docx_file2) @@ -354,6 +399,9 @@ def compare_contains_image(docx_file1, docx_file2): def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs): + if not file_path1 or not file_path2: + return 0 + if not compare_docx_files(file_path1, file_path2): return 0 document = Document(file_path1) @@ -388,6 +436,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs): def check_highlighted_words(file_path1, file_path2): + if not file_path1 or not file_path2: + return 0 + if not compare_docx_files(file_path1, file_path2): return 0 @@ -410,6 +461,9 @@ def check_highlighted_words(file_path1, file_path2): def evaluate_strike_through_last_paragraph(file_path1, file_path2): + if not file_path1 or not file_path2: + return 0 + if not compare_docx_files(file_path1, file_path2): return 0 document = Document(file_path1) @@ -426,6 +480,9 @@ def evaluate_strike_through_last_paragraph(file_path1, file_path2): def evaluate_conversion(file_path): + if not file_path: + return 0 + document = Document(file_path) for table in document.tables: @@ -445,6 +502,9 @@ def evaluate_conversion(file_path): def evaluate_spacing(file_path): + if not file_path: + return 0 + document = Document(file_path) # Check line spacing for introduction, body, and conclusion @@ -458,6 +518,9 @@ def evaluate_spacing(file_path): def check_italic_font_size_14(path1, path2): + if not path1 or not path2: + return 0 + if not compare_docx_files(path1, path2): return 0 document = Document(path1) @@ -471,6 +534,9 @@ def check_italic_font_size_14(path1, path2): def evaluate_alignment(docx_path): + if not docx_path: + return 0 + # Load the document doc = Document(docx_path) @@ -500,6 +566,9 @@ def evaluate_alignment(docx_path): def get_unique_train_ids(initial_file): # fixed standard + if not initial_file: + return set(), 0 + doc = Document(initial_file) train_ids = set() processed_lines = 0 @@ -516,6 +585,9 @@ def get_unique_train_ids(initial_file): # fixed standard def check_no_duplicates(initial_file, processed_file): + if not initial_file or not processed_file: + return 0 + # Open the document train_ids_ini, ini_lines = get_unique_train_ids(initial_file) doc_processed = Document(processed_file) @@ -543,6 +615,9 @@ def check_no_duplicates(initial_file, processed_file): def compare_docx_lines(file1, file2): + if not file1 or not file2: + return 0 + # Read the text of the document, line by line doc1 = Document(file1) doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()] @@ -562,6 +637,9 @@ def compare_docx_lines(file1, file2): def compare_docx_files_and_ignore_new_lines(file1, file2, **options): ignore_blanks = options.get('ignore_blanks', True) + if not file1 or not file2: + return 0 + # Determine file types and load documents if file1.endswith('.docx') and file2.endswith('.docx'): doc1 = Document(file1) @@ -594,6 +672,9 @@ def compare_docx_files_and_ignore_new_lines(file1, file2, **options): # Docx file saved in the ubuntu cannot use this function to compare highlight, don't know why, deprecated def compare_highlighted_text(file1, file2): + if not file1 or not file2: + return 0 + def extract_highlighted_text(file_path): highlighted_texts = [] @@ -631,6 +712,9 @@ def compare_highlighted_text(file1, file2): def compare_references(file1, file2, **options): + if not file1 or not file2: + return 0 + reference_indicator = options.get('reference_indicator', 'References') reference_base_result = options.get('reference_base_result', 0.5) @@ -675,48 +759,3 @@ def compare_references(file1, file2, **options): return (result - reference_base_result) / (1 - reference_base_result) else: return 0 - - -def compare_answer(file1, file2, **options): - """This is a specific function to compare the """ - # Determine file types and load documents - if file1.endswith('.docx') and file2.endswith('.docx'): - doc1 = Document(file1) - doc2 = Document(file2) - doc1_paragraphs = [p.text for p in doc1.paragraphs] - doc2_paragraphs = [p.text for p in doc2.paragraphs] - else: - # Unsupported file types or mismatch - print("Unsupported file types or mismatch between file types.") - return 0 - - # Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list - ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1 - ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1 - - if ref1_idx == -1 and ref2_idx == -1: - return 1 - - if ref1_idx == -1 or ref2_idx == -1: - return 0 - - # split the reference section into reference items, and remove the empty string items - ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()] - ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()] - - # Compare the references - - if len(ref1) != len(ref2): - return 0 - - total_similarity = 0 - for r1, r2 in zip(ref1, ref2): - # fuzzy match the references - similarity = fuzz.ratio(r1, r2) / 100.0 - total_similarity += similarity - - result = total_similarity / len(ref1) - if result >= reference_base_result: - return (result - reference_base_result) / (1 - reference_base_result) - else: - return 0 diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index c68fb90..02ad5bb 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -1,29 +1,30 @@ import csv +import datetime +import difflib import functools import json -import yaml +import logging import operator import re -import pdfplumber import sqlite3 from numbers import Number from typing import Callable, Any, Union from typing import Dict, List, Pattern -import datetime -import pandas as pd + import lxml.etree +import pandas as pd +import pdfplumber +import yaml +from docx import Document from lxml.cssselect import CSSSelector from lxml.etree import _Element from rapidfuzz import fuzz -from docx import Document -import difflib from .utils import _match_record, _match_value_to_rule -import logging - logger = logging.getLogger("desktopenv.metric.general") + def check_include_exclude(result: str, rules: Dict[str, List[str]]) -> float: if result is None: return 0. @@ -71,6 +72,7 @@ def is_in_list(result, rules) -> float: else: return 0. + def diff_text_file(result: str, expect: str) -> float: if result is None: return 0. @@ -81,6 +83,7 @@ def diff_text_file(result: str, expect: str) -> float: expected_lines: List[str] = f.read().splitlines() return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio() + def fuzzy_match(result, rules) -> float: expect = rules["expected"] @@ -88,7 +91,7 @@ def fuzzy_match(result, rules) -> float: def fuzzy_place_math(result_file_path, rules) -> float: - expect = rules["expected"] # a list of possible answers + expect = rules["expected"] # a list of possible answers # read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line doc = Document(result_file_path) words_list = [] @@ -102,6 +105,7 @@ def fuzzy_place_math(result_file_path, rules) -> float: return 0 return 1 + def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float: """ Args: @@ -209,10 +213,10 @@ def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float: return 0. if "text" in r: - match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \ - else (lambda a, b: fuzz.ratio(a, b) / 100.) - , r["text"] - ) + match_func: Callable[[str], Number] = functools.partial(operator.eq if r["exact"] \ + else (lambda a, b: fuzz.ratio(a, b) / 100.) + , r["text"] + ) match_score: Number = 0 for elm in elements: match_score = max(match_score, match_func(elm.text or None)) @@ -285,7 +289,7 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str return float(metric) -def check_direct_json_object(result, rules)->float: +def check_direct_json_object(result, rules) -> float: """ One of the most commonly used function to evalute. Compare two json objects directly. @@ -320,7 +324,11 @@ def check_direct_json_object(result, rules)->float: return 0. return 1.0 + def compare_time_in_speedtest_results(speedtest_result_path, time_diff): + if not speedtest_result_path: + return 0 + # open the speedtest results file(csv) date_col = None with open(speedtest_result_path, 'r') as f: @@ -333,12 +341,17 @@ def compare_time_in_speedtest_results(speedtest_result_path, time_diff): for date in reader[date_col]: date_time = date[-5:] # compare the date time with the current date time, if time diff less than time_diff para, then return true - if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time, '%H:%M')).total_seconds()) / 60 < int(time_diff): - return False - return True + if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time, + '%H:%M')).total_seconds()) / 60 < int( + time_diff): + return 0 + return 1 def is_included_all_json_objects(gold_file_path, result_file_path): + if not gold_file_path or not result_file_path: + return 0 + print("gold_file_path: ") print(gold_file_path) print("result_file_path: ") @@ -350,8 +363,8 @@ def is_included_all_json_objects(gold_file_path, result_file_path): result_json = json.load(fr) for key in gold_json.keys(): if key not in result_json.keys() or gold_json[key] != result_json[key]: - return False - return True + return 0 + return 1 def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path): @@ -373,31 +386,32 @@ def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path): if len(false_list) > 0: print("false_list: ") print(false_list) - return False + return 0 else: - return True + return 1 def file_contains(file_path, config): # file_path ends with .txt - if not file_path : - return False + if not file_path: + return 1 with open(file_path, 'r') as f: file_text = f.read() for text in config["expected"]: if text not in file_text: - return False - return True + return 0 + return 1 + def check_csv_line_number(file_path, line_number): # check file_path suffix if not file_path.endswith('.csv'): - return False + return 0 # check line number with open(file_path, 'r') as f: reader = csv.reader(f) line_count = sum(1 for row in reader) - return True if line_count == int(line_number["expected"]) else False + return 1 if line_count == int(line_number["expected"]) else 0 def compare_terminal_and_txt(txt_file_path, terminal_output): @@ -405,7 +419,7 @@ def compare_terminal_and_txt(txt_file_path, terminal_output): with open(txt_file_path, 'r') as f: txt_file_content = f.read() # compare terminal output with txt file content - return True if terminal_output == txt_file_content else False + return 1 if terminal_output == txt_file_content else 0 def compare_python_pure_text(py_file_path, gold_file_path): @@ -414,13 +428,18 @@ def compare_python_pure_text(py_file_path, gold_file_path): print(py_file_path) print("gold_file_path: ") print(gold_file_path) + # gold_file_path = gold_file_path.replace('.txt', '.py') def remove_whitespace(text): return ''.join(text.split()) + with open(py_file_path, 'r') as file1: content1 = file1.read() with open(gold_file_path, 'r') as file2: content2 = file2.read() content1_no_whitespace = remove_whitespace(content1) content2_no_whitespace = remove_whitespace(content2) - return content1_no_whitespace == content2_no_whitespace \ No newline at end of file + if content1_no_whitespace == content2_no_whitespace: + return 1 + else: + return 0