xiaochuan's multiapp examples

2024-03-08 19:24:15 +08:00
parent 98a2302a07
commit 62fd8feebb
31 changed files with 1286 additions and 31 deletions
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -2,7 +2,8 @@ from .basic_os import (
    check_gnome_favorite_apps,
    is_utc_0,
    check_text_enlarged,
-    check_moved_jpgs
+    check_moved_jpgs,
+    is_in_vm_clickboard
 )
 from .chrome import (
    is_expected_tabs,
@@ -18,7 +19,8 @@ from .chrome import (
    is_expected_search_query,
    is_expected_active_tab,
    is_expected_url_pattern_match,
-    is_added_to_steam_cart
+    is_added_to_steam_cart,
+    is_expected_installed_extensions
 )
 from .docs import (
    compare_font_names,
@@ -45,7 +47,8 @@ from .docs import (
    is_first_line_centered,
    check_file_exists,
    check_tabstops,
-    compare_contains_image
+    compare_contains_image,
+    compare_docx_files_and_ignore_new_lines
 )
 from .general import (
    check_csv,
@@ -57,7 +60,15 @@ from .general import (
    is_in_list,
    fuzzy_match,
    check_include_exclude,
-    check_direct_json_object
+    check_direct_json_object,
+    compare_time_in_speedtest_results,
+    is_included_all_json_objects,
+    is_gold_text_included_in_pdf,
+    check_csv_line_number,
+    file_contains,
+    compare_terminal_and_txt,
+    fuzzy_place_math,
+    compare_python_pure_text
 )
 from .gimp import (
    check_brightness_decrease_and_structure_sim,
@@ -124,7 +135,7 @@ from .vscode import (
    check_json_settings,
    check_json_keybindings
 )
-
+from .calc import compare_conference_city_in_order

 def infeasible():
    pass
--- a/desktop_env/evaluators/metrics/basic_os.py
+++ b/desktop_env/evaluators/metrics/basic_os.py
@@ -56,3 +56,15 @@ def check_moved_jpgs(directory_list, rule):
        return 1
    else:
        return 0
+
+def is_in_vm_clickboard(config, terminal_output):
+    print("terminal_output: ")
+    print(terminal_output)
+    print("config: ")
+    print(config)
+    expected_results = config["expected"]
+    # check if terminal_output has expected results
+    if not isinstance(expected_results, list):
+        return 1 if expected_results in terminal_output else 0
+    else:
+        return 1 if all(result in terminal_output for result in expected_results) else 0
--- a/desktop_env/evaluators/metrics/calc.py
+++ b/desktop_env/evaluators/metrics/calc.py
@@ -0,0 +1,24 @@
+import openpyxl
+
+def compare_conference_city_in_order( actual_city_list_path, expected_city):
+    expected_city_list = expected_city["expected"]
+    print(f"Reading csv file from {actual_city_list_path}")
+    wb = openpyxl.load_workbook(actual_city_list_path)
+    sheet = wb.active
+    actual_city_list = []
+    for row in sheet["C2:C22"]:
+        for cell in row:
+            actual_city_list.append(cell.value)
+    # expected_city is the city that we want to compare with the actual city list
+    # must in order index
+    # debug
+    print("expected_city_list:")
+    print(expected_city_list)
+    print("actual_city_list_path:")
+    print(actual_city_list)
+    wrong_list = []
+    for i in range(len(actual_city_list)):
+        if expected_city_list[i] not in actual_city_list[i]:
+            wrong_list.append(i)
+            print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+    return True if len(wrong_list) == 0 else False
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -61,6 +61,12 @@ def is_expected_url_pattern_match(result, rules) -> float:
    return 1.


+def is_expected_installed_extensions(installed_extensions, expected) -> float:
+    print("installed_extensions: ")
+    print(installed_extensions)
+    expected_extensions = expected["expected"]
+    return 1 if expected_extensions == installed_extensions else 0. # must equal, no additional extensions allowed  
+
 def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
    """
    Checks if the expected tabs are open in Chrome.
--- a/desktop_env/evaluators/metrics/demo.py
+++ b/desktop_env/evaluators/metrics/demo.py
@@ -0,0 +1,28 @@
+import fitz  # PyMuPDF
+
+def extract_answers_from_pdf(pdf_file):
+    # 打开PDF文件
+    doc = fitz.open(pdf_file)
+    answers = []
+
+    # 遍历每一页
+    for page in doc:
+        # 提取当前页的文本
+        text = page.get_text() 
+        # 分割文本为行
+        lines = text.split('\n')
+        for line in lines:
+            if line.strip():  # 排除空白行
+                # 分割等号，提取答案
+                parts = line.split('=')
+                if len(parts) > 1:
+                    answer = parts[-1].strip()  # 取等号后的部分为答案
+                    answers.append(answer)
+    
+    return answers
+
+# 假设你的文件名是'math_problems.pdf'
+pdf_file = '/Users/lxc/Desktop/calculus.pdf'
+answers = extract_answers_from_pdf(pdf_file)
+for i, answer in enumerate(answers, 1):
+    print(f"题目{i}的答案是: {answer}")
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -96,12 +96,19 @@ def compare_docx_files(file1, file2, **options):
        if text1 != text2:
            return 0
    else:
+        print("ignore_blanks=false")
        if len(doc1_paragraphs) != len(doc2_paragraphs):
+            print(doc1_paragraphs)
+            print(doc2_paragraphs)
+            print(len(doc1_paragraphs))
+            print(len(doc2_paragraphs))
            return 0
-
+        print("in compare")
        # Compare each paragraph
        for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
            if p1 != p2:
+                print(p1)
+                print(p2)
                return 0

    return 1
@@ -490,6 +497,39 @@ def compare_docx_lines(file1, file2):
        return 0


+def compare_docx_files_and_ignore_new_lines(file1, file2, **options):
+    ignore_blanks = options.get('ignore_blanks', True)
+
+    # Determine file types and load documents
+    if file1.endswith('.docx') and file2.endswith('.docx'):
+        doc1 = Document(file1)
+        doc2 = Document(file2)
+        # First, delete all the blank in paragraphs
+        doc1 = [p for p in doc1.paragraphs if p.text != '']
+        doc2 = [p for p in doc2.paragraphs if p.text != '']
+        doc1_paragraphs = [p.text for p in doc1]
+        doc2_paragraphs = [p.text for p in doc2]
+    else:
+        # Unsupported file types or mismatch
+        print("Unsupported file types or mismatch between file types.")
+        return 0
+
+    # Process and compare documents
+    if ignore_blanks:
+        text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
+        text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
+        if text1 != text2:
+            return 0
+    else:
+        if len(doc1_paragraphs) != len(doc2_paragraphs):
+            return 0
+        # Compare each paragraph
+        for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
+            if p1 != p2:
+                return 0
+    return 1
+
+
 # Docx file saved in the ubuntu cannot use this function to compare highlight, don't know why, deprecated
 def compare_highlighted_text(file1, file2):
    def extract_highlighted_text(file_path):
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -3,15 +3,18 @@ import functools
 import json
 import operator
 import re
+import pdfplumber
 import sqlite3
 from numbers import Number
 from typing import Callable, Any, Union
 from typing import Dict, List, Pattern
-
+import datetime
+import pandas as pd
 import lxml.etree
 from lxml.cssselect import CSSSelector
 from lxml.etree import _Element
 from rapidfuzz import fuzz
+from docx import Document

 from .utils import _match_record, _match_value_to_rule

@@ -46,13 +49,27 @@ def is_in_list(result, rules) -> float:
        return 0.


-
 def fuzzy_match(result, rules) -> float:
    expect = rules["expected"]

    return fuzz.ratio(result, expect) / 100.


+def fuzzy_place_math(result_file_path, rules) -> float:
+    expect = rules["expected"] # a list of possible answers
+    # read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line
+    doc = Document(result_file_path)
+    words_list = []
+    for para in doc.paragraphs:
+        words_list.extend(para.text.split())
+    # 打印出提取的单词列表
+    print(words_list)
+    for word in words_list:
+        if not any(ans in word for ans in expect):
+            print("Wrong place:", word)
+            return 0
+    return 1
+
 def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
    """
    Args:
@@ -227,15 +244,138 @@ def check_direct_json_object(result, rules)->float:
    One of the most commonly used function to evalute.
    Compare two json objects directly.
    """
+    if isinstance(result, str):
+        # remove blanks before and after result
+        result = result.strip()
+        # replace all ' with "
+        result = result.replace("'", '"')
+        # load json object
+        result = json.loads(result)
    print("result: ")
    print(result)
    print("expected: ")
    print(rules["expected"])
    if result is None:
        return 0.
-    expected_json = rules["expected"]
-    for key in expected_json.keys():
-        expected_value = expected_json.get(key)
-        if expected_value != result.get(key):
-            return 0.
-    return 1.0
+    expect_in_result = rules.get("expect_in_result", False)
+    if not expect_in_result:
+        expected_json = rules["expected"]
+        for key in expected_json.keys():
+            expected_value = expected_json.get(key)
+            if expected_value != result.get(key):
+                return 0.
+        return 1.0
+    else:
+        expected_json = rules["expected"]
+        for key in expected_json.keys():
+            expected_value = expected_json.get(key)
+            if expected_value not in result.get(key):
+                return 0.
+        return 1.0
+
+def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
+    # open the speedtest results file(csv)
+    date_col = None
+    with open(speedtest_result_path, 'r') as f:
+        reader = pd.read_csv(f)
+        for column in reader.columns:
+            if column.startswith('TEST_DATE'):
+                date_col = column
+                break
+        now_date_time = datetime.datetime.now().strftime('%H:%M')
+        for date in reader[date_col]:
+            date_time = date[-5:]
+            # compare the date time with the current date time, if time diff less than time_diff para, then return true
+            if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time, '%H:%M')).total_seconds()) / 60 < int(time_diff):
+                return False
+        return True
+
+
+def is_included_all_json_objects(gold_file_path, result_file_path):
+    print("gold_file_path: ")
+    print(gold_file_path)
+    print("result_file_path: ")
+    print(result_file_path)
+    # two json file, check if all the key-value pair in gold_file_path is included in result_file_path
+    with open(gold_file_path, 'r') as f:
+        gold_json = json.load(f)
+    with open(result_file_path, 'r') as fr:
+        result_json = json.load(fr)
+    for key in gold_json.keys():
+        if key not in result_json.keys() or gold_json[key] != result_json[key]:
+            return False
+    return True
+
+
+def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
+    print("gold_text_path: ")
+    print(gold_text_path)
+    print("pdf_file_path: ")
+    print(pdf_file_path)
+    # gold file is a json file, we need to check all the value in json are included in pdf file.
+    with open(gold_text_path, 'r') as f:
+        gold_json = json.load(f)
+    with pdfplumber.open(pdf_file_path) as pdf:
+        text = ''
+        for page in pdf.pages:
+            text += page.extract_text()
+    false_list = []
+    for key in gold_json.keys():
+        if gold_json[key] not in text:
+            false_list.append(key)
+    if len(false_list) > 0:
+        print("false_list: ")
+        print(false_list)
+        return False
+    else:
+        return True
+
+
+def file_contains(file_path, config):
+    # file_path ends with .txt
+    if not file_path :
+        return False
+    with open(file_path, 'r') as f:
+        file_text = f.read()
+    for text in config["expected"]:
+        if text not in file_text:
+            return False
+    return True
+
+def check_csv_line_number(file_path, line_number):
+    # check file_path suffix
+    if not file_path.endswith('.csv'):
+        return False
+    # check line number
+    with open(file_path, 'r') as f:
+        reader = csv.reader(f)
+        line_count = sum(1 for row in reader)
+    return True if line_count == int(line_number["expected"]) else False
+
+
+def compare_terminal_and_txt(txt_file_path, terminal_output):
+    # read txt file content
+    with open(txt_file_path, 'r') as f:
+        txt_file_content = f.read()
+    # compare terminal output with txt file content
+    return True if terminal_output == txt_file_content else False
+
+
+def compare_python_pure_text(py_file_path, gold_file_path):
+    # first, change the suffix of gold_file from .txt to .py
+    print("py_file_path: ")
+    print(py_file_path)
+    print("gold_file_path: ")
+    print(gold_file_path)
+    # gold_file_path = gold_file_path.replace('.txt', '.py')
+    def remove_whitespace(text):
+        return ''.join(text.split())
+    with open(py_file_path, 'r') as file1:
+        content1 = file1.read()
+    with open(gold_file_path, 'r') as file2:
+        content2 = file2.read()
+    # 移除文件内容中的所有空白字符
+    content1_no_whitespace = remove_whitespace(content1)
+    content2_no_whitespace = remove_whitespace(content2)
+    # 比较处理后的文件内容
+    return content1_no_whitespace == content2_no_whitespace
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -1,10 +1,12 @@
 import builtins
+import datetime
 import functools
 import itertools
 import logging
 import operator
 import re
 import zipfile
+import pandas as pd
 from typing import Any, TypeVar, Union, Iterable, Optional, Callable
 from typing import Dict, List, Set, Match, Tuple, Pattern
 from urllib.parse import urlparse, urlunparse