add multi-apps 5 examples by ruisheng 2024-03-06

2024-03-06 21:20:26 +08:00
parent 69ef653a7c
commit da0dafc32c
14 changed files with 645 additions and 15 deletions
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -60,7 +60,8 @@ from .general import (
    fuzzy_match,
    check_include_exclude,
    check_direct_json_object,
-    diff_text_file
+    diff_text_file,
+    literal_match
 )
 from .gimp import (
    check_brightness_decrease_and_structure_sim,
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -58,6 +58,8 @@ def contains_page_break(docx_file):

 def compare_docx_files(file1, file2, **options):
    ignore_blanks = options.get('ignore_blanks', True)
+    ignore_case = options.get('ignore_case', False)
+    ignore_order = options.get('ignore_order', False)
    content_only = options.get('content_only', False)

    def get_paragraph_texts_odt(document):
@@ -82,11 +84,17 @@ def compare_docx_files(file1, file2, **options):
        doc2 = Document(file2)
        doc1_paragraphs = [p.text for p in doc1.paragraphs]
        doc2_paragraphs = [p.text for p in doc2.paragraphs]
+        if ignore_order:
+            doc1_paragraphs = sorted(doc1_paragraphs)
+            doc2_paragraphs = sorted(doc2_paragraphs)
    elif file1.endswith('.odt') and file2.endswith('.odt'):
        doc1 = load(file1)
        doc2 = load(file2)
        doc1_paragraphs = get_paragraph_texts_odt(doc1)
        doc2_paragraphs = get_paragraph_texts_odt(doc2)
+        if ignore_order:
+            doc1_paragraphs = sorted(doc1_paragraphs)
+            doc2_paragraphs = sorted(doc2_paragraphs)
    else:
        # Unsupported file types or mismatch
        print("Unsupported file types or mismatch between file types.")
@@ -96,6 +104,8 @@ def compare_docx_files(file1, file2, **options):
        # Compare the content of the documents
        text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
        text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
+        if ignore_case:
+            text1, text2 = text1.lower(), text2.lower()
        similarity = fuzz.ratio(text1, text2) / 100.0
        return similarity

@@ -103,6 +113,8 @@ def compare_docx_files(file1, file2, **options):
    if ignore_blanks:
        text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
        text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
+        if ignore_case:
+            text1, text2 = text1.lower(), text2.lower()
        if text1 != text2:
            return 0
    else:
@@ -111,6 +123,8 @@ def compare_docx_files(file1, file2, **options):

        # Compare each paragraph
        for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
+            if ignore_case:
+                p1, p2 = p1.lower(), p2.lower()
            if p1 != p2:
                return 0

--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -39,6 +39,24 @@ def exact_match(result, rules) -> float:
    else:
        return 0.

+
+def literal_match(result: Any, expected: Any, **options) -> float:
+    literal_type = options.get('type', 'str')
+    if literal_type == 'str':
+        ignore_case = options.get('ignore_case', False)
+        score = str(result) == str(expected) if not ignore_case else str(result).lower() == str(expected).lower()
+        return float(score)
+    elif literal_type == 'list':
+        if type(result) not in [list, tuple] or type(expected) not in [list, tuple] or len(result) != len(expected):
+            return .0
+        ignore_case = options.get('ignore_case', False)
+        result = [str(s) for s in result] if not ignore_case else [str(s).lower() for s in result]
+        expected = [str(s) for s in expected] if not ignore_case else [str(s).lower() for s in expected]
+        return float(result == expected)
+    else:
+        raise NotImplementedError(f"Type {type} not supported")
+
+
 def is_in_list(result, rules) -> float:
    expect = rules["expected"]
    if expect in result: