Update multi-apps examples

2024-03-07 22:15:08 +08:00
parent fd9f6cbc59
commit 1aa2a43908
7 changed files with 445 additions and 59 deletions
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -11,9 +11,9 @@ from docx.shared import RGBColor
 from odf.opendocument import load
 from odf.text import P
 from odf.text import Span
+from rapidfuzz import fuzz
 from skimage.color import deltaE_ciede2000
 from skimage.color import rgb2lab
-from rapidfuzz import fuzz

 logger = logging.getLogger("desktopenv.metric.docs")

@@ -173,9 +173,11 @@ def compare_docx_tables(docx_file1, docx_file2):

    return 1

+
 from io import BytesIO
 from PIL import Image

+
 def compare_docx_images(docx_file1, docx_file2):
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)
@@ -187,7 +189,7 @@ def compare_docx_images(docx_file1, docx_file2):
                img_data = rel.target_part.blob
                images.append(BytesIO(img_data))
        return images
-    
+
    images1 = extract_images(doc1)
    images2 = extract_images(doc2)
    if len(images1) != len(images2):
@@ -197,8 +199,10 @@ def compare_docx_images(docx_file1, docx_file2):
            return 0
    return 1

+
 import pytesseract

+
 def compare_image_text(image_path, rule):
    img = Image.open(image_path)
    img_text = pytesseract.image_to_string(img)
@@ -207,6 +211,7 @@ def compare_image_text(image_path, rule):
    else:
        raise ValueError("Unsupported rule type")

+
 def compare_line_spacing(docx_file1, docx_file2):
    if not compare_docx_files(docx_file1, docx_file2):
        return 0
@@ -313,7 +318,7 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
    section = doc2.sections[0]
    paragraph_width = section.page_width - section.left_margin - section.right_margin
    ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (
-                x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
+            x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
    minus = .0
    for p1, p2 in zip(para1, para2):
        # filter CLEAR tabstop and default left-0 tabstop
@@ -583,3 +588,95 @@ def compare_highlighted_text(file1, file2):
        return 1
    else:
        return 0
+
+
+def compare_references(file1, file2, **options):
+    reference_indicator = options.get('reference_indicator', 'References')
+    reference_base_result = options.get('reference_base_result', 0.5)
+
+    # Determine file types and load documents
+    if file1.endswith('.docx') and file2.endswith('.docx'):
+        doc1 = Document(file1)
+        doc2 = Document(file2)
+        doc1_paragraphs = [p.text for p in doc1.paragraphs]
+        doc2_paragraphs = [p.text for p in doc2.paragraphs]
+    else:
+        # Unsupported file types or mismatch
+        print("Unsupported file types or mismatch between file types.")
+        return 0
+
+    # Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list
+    ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1
+    ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1
+
+    if ref1_idx == -1 and ref2_idx == -1:
+        return 1
+
+    if ref1_idx == -1 or ref2_idx == -1:
+        return 0
+
+    # split the reference section into reference items, and remove the empty string items
+    ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()]
+    ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()]
+
+    # Compare the references
+
+    if len(ref1) != len(ref2):
+        return 0
+
+    total_similarity = 0
+    for r1, r2 in zip(ref1, ref2):
+        # fuzzy match the references
+        similarity = fuzz.ratio(r1, r2) / 100.0
+        total_similarity += similarity
+
+    result = total_similarity / len(ref1)
+    if result >= reference_base_result:
+        return (result - reference_base_result) / (1 - reference_base_result)
+    else:
+        return 0
+
+
+def compare_answer(file1, file2, **options):
+    """This is a specific function to compare the """
+    # Determine file types and load documents
+    if file1.endswith('.docx') and file2.endswith('.docx'):
+        doc1 = Document(file1)
+        doc2 = Document(file2)
+        doc1_paragraphs = [p.text for p in doc1.paragraphs]
+        doc2_paragraphs = [p.text for p in doc2.paragraphs]
+    else:
+        # Unsupported file types or mismatch
+        print("Unsupported file types or mismatch between file types.")
+        return 0
+
+    # Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list
+    ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1
+    ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1
+
+    if ref1_idx == -1 and ref2_idx == -1:
+        return 1
+
+    if ref1_idx == -1 or ref2_idx == -1:
+        return 0
+
+    # split the reference section into reference items, and remove the empty string items
+    ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()]
+    ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()]
+
+    # Compare the references
+
+    if len(ref1) != len(ref2):
+        return 0
+
+    total_similarity = 0
+    for r1, r2 in zip(ref1, ref2):
+        # fuzzy match the references
+        similarity = fuzz.ratio(r1, r2) / 100.0
+        total_similarity += similarity
+
+    result = total_similarity / len(ref1)
+    if result >= reference_base_result:
+        return (result - reference_base_result) / (1 - reference_base_result)
+    else:
+        return 0
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -16,9 +16,9 @@ from openpyxl.utils import get_column_letter
 from openpyxl.worksheet.datavalidation import DataValidation
 from openpyxl.worksheet.worksheet import Worksheet

-from .utils import _match_value_to_rule, _read_cell_style, read_cell_value
-from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\
-                 , load_filters, load_pivot_tables
+from desktop_env.evaluators.metrics.utils import _match_value_to_rule, _read_cell_style, read_cell_value
+from desktop_env.evaluators.metrics.utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles \
+    , load_filters, load_pivot_tables
 from rapidfuzz import fuzz

 # from openpyxl.utils import coordinate_to_tuple