Merge branch 'main' of github.com:xlang-ai/DesktopEnv

2024-03-09 14:03:36 +08:00
parent 6ea3dd856f 1e0a78a453
commit d68e7369b7
2 changed files with 136 additions and 78 deletions
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -3,8 +3,10 @@ import os
 import re
 import xml.etree.ElementTree as ET
 import zipfile
 from io import BytesIO
 from typing import List, Dict, Any
 from PIL import Image
 from docx import Document
 from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
 from docx.shared import RGBColor
@@ -23,6 +25,9 @@ def find_default_font(config_file_path, rules):
    default_font = None
    expected_font = rules["font_name"]
    if not config_file_path:
        return 0
    try:
        tree = ET.parse(config_file_path)
        root = tree.getroot()
@@ -42,6 +47,9 @@ def find_default_font(config_file_path, rules):
 def contains_page_break(docx_file):
    if not docx_file:
        return 0
    doc = Document(docx_file)
    namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
@@ -62,6 +70,9 @@ def compare_docx_files(file1, file2, **options):
    ignore_order = options.get('ignore_order', False)
    content_only = options.get('content_only', False)
    if not file1 or not file2:
        return 0
    def get_paragraph_texts_odt(document):
        paragraphs = document.getElementsByType(P)
        paragraph_texts = []
@@ -139,6 +150,9 @@ def compare_docx_files(file1, file2, **options):
 def compare_init_lines(file1, file2):
    if not file1 or not file2:
        return 0
    doc1 = Document(file1)
    doc2 = Document(file2)
@@ -156,6 +170,9 @@ def compare_init_lines(file1, file2):
 def compare_docx_tables(docx_file1, docx_file2):
    if not docx_file1 or not docx_file2:
        return 0
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)
@@ -181,11 +198,10 @@ def compare_docx_tables(docx_file1, docx_file2):
    return 1
 from io import BytesIO
 from PIL import Image
 def compare_docx_images(docx_file1, docx_file2):
    if not docx_file1 or not docx_file2:
        return 0
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)
@@ -211,6 +227,9 @@ import pytesseract
 def compare_image_text(image_path, rule):
    if not image_path:
        return 0
    img = Image.open(image_path)
    img_text = pytesseract.image_to_string(img)
    if rule['type'] == 'text':
@@ -220,6 +239,9 @@ def compare_image_text(image_path, rule):
 def compare_line_spacing(docx_file1, docx_file2):
    if not docx_file1 or not docx_file2:
        return 0
    if not compare_docx_files(docx_file1, docx_file2):
        return 0
    doc1 = Document(docx_file1)
@@ -241,6 +263,9 @@ def compare_line_spacing(docx_file1, docx_file2):
 def compare_insert_equation(docx_file1, docx_file2):
    if not docx_file1 or not docx_file2:
        return 0
    if not compare_docx_files(docx_file1, docx_file2):
        return 0
@@ -256,6 +281,9 @@ def compare_insert_equation(docx_file1, docx_file2):
 def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
    if not docx_file:
        return 0
    doc = Document(docx_file)
    expected_font = rules["font_name"]
@@ -268,6 +296,9 @@ def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
 def compare_subscript_contains(docx_file1, docx_file2):
    if not docx_file1 or not docx_file2:
        return 0
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)
@@ -280,6 +311,9 @@ def compare_subscript_contains(docx_file1, docx_file2):
 def has_page_numbers_in_footers(docx_file):
    if not docx_file:
        return 0
    doc = Document(docx_file)
    for section in doc.sections:
@@ -294,6 +328,9 @@ def has_page_numbers_in_footers(docx_file):
 def is_first_line_centered(docx_file):
    if not docx_file:
        return 0
    doc = Document(docx_file)
    first_paragraph = doc.paragraphs[0]
@@ -302,11 +339,16 @@ def is_first_line_centered(docx_file):
 def check_file_exists(directory, filename):
    if not directory or not filename:
        return 0
    file_path = os.path.join(directory, filename)
    return 1 if os.path.isfile(file_path) else 0
 def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
    if not docx_file1 or not docx_file2:
        return .0
    doc1: Document = Document(docx_file1)
    doc2: Document = Document(docx_file2)
    para1 = [p for p in doc1.paragraphs if p.text.strip()]
@@ -342,6 +384,9 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
 def compare_contains_image(docx_file1, docx_file2):
    if not docx_file1 or not docx_file2:
        return 0
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)
@@ -354,6 +399,9 @@ def compare_contains_image(docx_file1, docx_file2):
 def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
    if not file_path1 or not file_path2:
        return 0
    if not compare_docx_files(file_path1, file_path2):
        return 0
    document = Document(file_path1)
@@ -388,6 +436,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
 def check_highlighted_words(file_path1, file_path2):
    if not file_path1 or not file_path2:
        return 0
    if not compare_docx_files(file_path1, file_path2):
        return 0
@@ -410,6 +461,9 @@ def check_highlighted_words(file_path1, file_path2):
 def evaluate_strike_through_last_paragraph(file_path1, file_path2):
    if not file_path1 or not file_path2:
        return 0
    if not compare_docx_files(file_path1, file_path2):
        return 0
    document = Document(file_path1)
@@ -426,6 +480,9 @@ def evaluate_strike_through_last_paragraph(file_path1, file_path2):
 def evaluate_conversion(file_path):
    if not file_path:
        return 0
    document = Document(file_path)
    for table in document.tables:
@@ -445,6 +502,9 @@ def evaluate_conversion(file_path):
 def evaluate_spacing(file_path):
    if not file_path:
        return 0
    document = Document(file_path)
    # Check line spacing for introduction, body, and conclusion
@@ -458,6 +518,9 @@ def evaluate_spacing(file_path):
 def check_italic_font_size_14(path1, path2):
    if not path1 or not path2:
        return 0
    if not compare_docx_files(path1, path2):
        return 0
    document = Document(path1)
@@ -471,6 +534,9 @@ def check_italic_font_size_14(path1, path2):
 def evaluate_alignment(docx_path):
    if not docx_path:
        return 0
    # Load the document
    doc = Document(docx_path)
@@ -500,6 +566,9 @@ def evaluate_alignment(docx_path):
 def get_unique_train_ids(initial_file):  # fixed standard
    if not initial_file:
        return set(), 0
    doc = Document(initial_file)
    train_ids = set()
    processed_lines = 0
@@ -516,6 +585,9 @@ def get_unique_train_ids(initial_file):  # fixed standard
 def check_no_duplicates(initial_file, processed_file):
    if not initial_file or not processed_file:
        return 0
    # Open the document
    train_ids_ini, ini_lines = get_unique_train_ids(initial_file)
    doc_processed = Document(processed_file)
@@ -543,6 +615,9 @@ def check_no_duplicates(initial_file, processed_file):
 def compare_docx_lines(file1, file2):
    if not file1 or not file2:
        return 0
    # Read the text of the document, line by line
    doc1 = Document(file1)
    doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
@@ -562,6 +637,9 @@ def compare_docx_lines(file1, file2):
 def compare_docx_files_and_ignore_new_lines(file1, file2, **options):
    ignore_blanks = options.get('ignore_blanks', True)
    if not file1 or not file2:
        return 0
    # Determine file types and load documents
    if file1.endswith('.docx') and file2.endswith('.docx'):
        doc1 = Document(file1)
@@ -594,6 +672,9 @@ def compare_docx_files_and_ignore_new_lines(file1, file2, **options):
 # Docx file saved in the ubuntu cannot use this function to compare highlight, don't know why, deprecated
 def compare_highlighted_text(file1, file2):
    if not file1 or not file2:
        return 0
    def extract_highlighted_text(file_path):
        highlighted_texts = []
@@ -631,6 +712,9 @@ def compare_highlighted_text(file1, file2):
 def compare_references(file1, file2, **options):
    if not file1 or not file2:
        return 0
    reference_indicator = options.get('reference_indicator', 'References')
    reference_base_result = options.get('reference_base_result', 0.5)
@@ -675,48 +759,3 @@ def compare_references(file1, file2, **options):
        return (result - reference_base_result) / (1 - reference_base_result)
    else:
        return 0
 def compare_answer(file1, file2, **options):
    """This is a specific function to compare the """
    # Determine file types and load documents
    if file1.endswith('.docx') and file2.endswith('.docx'):
        doc1 = Document(file1)
        doc2 = Document(file2)
        doc1_paragraphs = [p.text for p in doc1.paragraphs]
        doc2_paragraphs = [p.text for p in doc2.paragraphs]
    else:
        # Unsupported file types or mismatch
        print("Unsupported file types or mismatch between file types.")
        return 0
    # Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list
    ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1
    ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1
    if ref1_idx == -1 and ref2_idx == -1:
        return 1
    if ref1_idx == -1 or ref2_idx == -1:
        return 0
    # split the reference section into reference items, and remove the empty string items
    ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()]
    ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()]
    # Compare the references
    if len(ref1) != len(ref2):
        return 0
    total_similarity = 0
    for r1, r2 in zip(ref1, ref2):
        # fuzzy match the references
        similarity = fuzz.ratio(r1, r2) / 100.0
        total_similarity += similarity
    result = total_similarity / len(ref1)
    if result >= reference_base_result:
        return (result - reference_base_result) / (1 - reference_base_result)
    else:
        return 0
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -1,29 +1,30 @@
 import csv
 import datetime
 import difflib
 import functools
 import json
-import yaml
+import logging
 import operator
 import re
 import pdfplumber
 import sqlite3
 from numbers import Number
 from typing import Callable, Any, Union
 from typing import Dict, List, Pattern
-import datetime
+
 import pandas as pd
 import lxml.etree
 import pandas as pd
 import pdfplumber
 import yaml
 from docx import Document
 from lxml.cssselect import CSSSelector
 from lxml.etree import _Element
 from rapidfuzz import fuzz
 from docx import Document
 import difflib
 from .utils import _match_record, _match_value_to_rule
 import logging
 logger = logging.getLogger("desktopenv.metric.general")
 def check_include_exclude(result: str, rules: Dict[str, List[str]]) -> float:
    if result is None:
        return 0.
@@ -71,6 +72,7 @@ def is_in_list(result, rules) -> float:
    else:
        return 0.
 def diff_text_file(result: str, expect: str) -> float:
    if result is None:
        return 0.
@@ -81,6 +83,7 @@ def diff_text_file(result: str, expect: str) -> float:
        expected_lines: List[str] = f.read().splitlines()
    return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio()
 def fuzzy_match(result, rules) -> float:
    expect = rules["expected"]
@@ -88,7 +91,7 @@ def fuzzy_match(result, rules) -> float:
 def fuzzy_place_math(result_file_path, rules) -> float:
-    expect = rules["expected"] # a list of possible answers
+    expect = rules["expected"]  # a list of possible answers
    # read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line
    doc = Document(result_file_path)
    words_list = []
@@ -102,6 +105,7 @@ def fuzzy_place_math(result_file_path, rules) -> float:
            return 0
    return 1
 def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
    """
    Args:
@@ -209,10 +213,10 @@ def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float:
            return 0.
        if "text" in r:
-            match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \
+            match_func: Callable[[str], Number] = functools.partial(operator.eq if r["exact"] \
-                                                                               else (lambda a, b: fuzz.ratio(a, b) / 100.)
+                                                                        else (lambda a, b: fuzz.ratio(a, b) / 100.)
-                                                                   , r["text"]
+                                                                    , r["text"]
-                                                                   )
+                                                                    )
            match_score: Number = 0
            for elm in elements:
                match_score = max(match_score, match_func(elm.text or None))
@@ -285,7 +289,7 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
    return float(metric)
-def check_direct_json_object(result, rules)->float:
+def check_direct_json_object(result, rules) -> float:
    """
    One of the most commonly used function to evalute.
    Compare two json objects directly.
@@ -320,7 +324,11 @@ def check_direct_json_object(result, rules)->float:
                return 0.
        return 1.0
 def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
    if not speedtest_result_path:
        return 0
    # open the speedtest results file(csv)
    date_col = None
    with open(speedtest_result_path, 'r') as f:
@@ -333,12 +341,17 @@ def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
        for date in reader[date_col]:
            date_time = date[-5:]
            # compare the date time with the current date time, if time diff less than time_diff para, then return true
-            if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time, '%H:%M')).total_seconds()) / 60 < int(time_diff):
+            if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time,
-                return False
+                                                                                                    '%H:%M')).total_seconds()) / 60 < int(
-        return True
+                    time_diff):
                return 0
        return 1
 def is_included_all_json_objects(gold_file_path, result_file_path):
    if not gold_file_path or not result_file_path:
        return 0
    print("gold_file_path: ")
    print(gold_file_path)
    print("result_file_path: ")
@@ -350,8 +363,8 @@ def is_included_all_json_objects(gold_file_path, result_file_path):
        result_json = json.load(fr)
    for key in gold_json.keys():
        if key not in result_json.keys() or gold_json[key] != result_json[key]:
-            return False
+            return 0
-    return True
+    return 1
 def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
@@ -373,31 +386,32 @@ def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
    if len(false_list) > 0:
        print("false_list: ")
        print(false_list)
-        return False
+        return 0
    else:
-        return True
+        return 1
 def file_contains(file_path, config):
    # file_path ends with .txt
-    if not file_path :
+    if not file_path:
-        return False
+        return 1
    with open(file_path, 'r') as f:
        file_text = f.read()
    for text in config["expected"]:
        if text not in file_text:
-            return False
+            return 0
-    return True
+    return 1
 def check_csv_line_number(file_path, line_number):
    # check file_path suffix
    if not file_path.endswith('.csv'):
-        return False
+        return 0
    # check line number
    with open(file_path, 'r') as f:
        reader = csv.reader(f)
        line_count = sum(1 for row in reader)
-    return True if line_count == int(line_number["expected"]) else False
+    return 1 if line_count == int(line_number["expected"]) else 0
 def compare_terminal_and_txt(txt_file_path, terminal_output):
@@ -405,7 +419,7 @@ def compare_terminal_and_txt(txt_file_path, terminal_output):
    with open(txt_file_path, 'r') as f:
        txt_file_content = f.read()
    # compare terminal output with txt file content
-    return True if terminal_output == txt_file_content else False
+    return 1 if terminal_output == txt_file_content else 0
 def compare_python_pure_text(py_file_path, gold_file_path):
@@ -414,13 +428,18 @@ def compare_python_pure_text(py_file_path, gold_file_path):
    print(py_file_path)
    print("gold_file_path: ")
    print(gold_file_path)
    # gold_file_path = gold_file_path.replace('.txt', '.py')
    def remove_whitespace(text):
        return ''.join(text.split())
    with open(py_file_path, 'r') as file1:
        content1 = file1.read()
    with open(gold_file_path, 'r') as file2:
        content2 = file2.read()
    content1_no_whitespace = remove_whitespace(content1)
    content2_no_whitespace = remove_whitespace(content2)
-    return content1_no_whitespace == content2_no_whitespace
+    if content1_no_whitespace == content2_no_whitespace:
        return 1
    else:
        return 0