merge

2024-03-09 18:53:27 +08:00
parent 5b07ec17bf 2291af394f
commit aae848196b
48 changed files with 2106 additions and 155 deletions
--- a/desktop_env/evaluators/getters/init.py
+++ b/desktop_env/evaluators/getters/init.py
@@ -24,6 +24,7 @@ from .chrome import (
    get_gotoRecreationPage_and_get_html_content,
    get_url_dashPart,
    get_active_url_from_accessTree,
+    get_find_installed_extension_name,
    get_info_from_website
 )
 from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file
@@ -31,7 +32,8 @@ from .general import get_vm_command_line, get_vm_terminal_output, get_vm_command
 from .gimp import get_gimp_config_file
 from .impress import get_audio_in_slide, get_background_image_in_slide
 from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory
-from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime
+from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime, get_time_diff_range
 from .replay import get_replay
 from .vlc import get_vlc_playing_info, get_vlc_config, get_default_video_player
 from .vscode import get_vscode_config
+from .calc import get_conference_city_in_order
--- a/desktop_env/evaluators/getters/calc.py
+++ b/desktop_env/evaluators/getters/calc.py
@@ -0,0 +1,15 @@
+import csv
+
+# I want to write a function, reads a csv file, and get all the contents in the third column in the order of rows
+def get_conference_city_in_order(env, config):
+    # read the csv file
+    csv_path = config['csv_path']
+    print(f"Reading csv file from {csv_path}")
+    with open(csv_path, 'r') as f:
+        reader = csv.reader(f)
+    # skip the header row
+    next(reader)
+    # get the third column in the order of rows
+    conference_city_list = [row[2] for row in reader]
+    return conference_city_list
+    
--- a/desktop_env/evaluators/getters/chrome.py
+++ b/desktop_env/evaluators/getters/chrome.py
@@ -4,6 +4,7 @@ import os
 import platform
 import sqlite3
 import time
+from urllib.parse import unquote
 from typing import Dict, Any, List
 from urllib.parse import urlparse, parse_qs

@@ -1010,6 +1011,43 @@ def get_find_unpacked_extension_path(env, config: Dict[str, str]):
        return "Google"


+def get_find_installed_extension_name(env, config: Dict[str, str]):
+    os_type = env.vm_platform
+    if os_type == 'Windows':
+        preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
+                                            'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip()
+    elif os_type == 'Darwin':
+        preference_file_path = env.controller.execute_python_command(
+            "import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[
+            'output'].strip()
+    elif os_type == 'Linux':
+        if "arm" in platform.machine():
+            preference_file_path = env.controller.execute_python_command(
+                "import os; print(os.path.join(os.getenv('HOME'), 'snap/chromium/common/chromium/Default/Preferences'))")[
+                'output'].strip()
+        else:
+            preference_file_path = env.controller.execute_python_command(
+                "import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[
+                'output'].strip()
+
+    else:
+        raise Exception('Unsupported operating system')
+
+    try:
+        content = env.controller.get_file(preference_file_path)
+        data = json.loads(content)
+        # Preferences store all the path of installed extensions, return them all and let metrics try to find one matches the targeted extension path
+        all_extensions_name = []
+        all_extensions = data.get('extensions', {}).get('settings', {})
+        for id in all_extensions.keys():
+            name = all_extensions[id]["manifest"]["name"]
+            all_extensions_name.append(name)
+        return all_extensions_name
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        return "Google"
+
+
 def get_data_delete_automacally(env, config: Dict[str, str]):
    """
    This function is used to open th "auto-delete" mode of chromium
@@ -1037,8 +1075,8 @@ def get_data_delete_automacally(env, config: Dict[str, str]):
    try:
        content = env.controller.get_file(preference_file_path)
        data = json.loads(content)
-        data_delete_state = data["profile"]["exit_type"]
-        return data_delete_state
+        data_delete_state = data["profile"].get("default_content_setting_values", None)
+        return "true" if data_delete_state is not None else "false"
    except Exception as e:
        logger.error(f"Error: {e}")
        return "Google"
@@ -1077,6 +1115,7 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
    """
    active_tab_url = get_active_url_from_accessTree(env, config)
    if not isinstance(active_tab_url, str):
+        logger.error("active_tab_url is not a string")
        return None
    host = env.vm_ip
    port = 9222  # fixme: this port is hard-coded, need to be changed from config file
@@ -1109,12 +1148,14 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
        for context in browser.contexts:
            for page in context.pages:
                page.wait_for_load_state("networkidle")
-                if page.url == active_tab_url:
+                # the accTree and playwright can get encoding(percent-encoding) characters, we need to convert them to normal characters
+                if unquote(page.url) == unquote(active_tab_url):
                    target_page = page
-                    print("tartget page url: ", target_page.url)
-                    print("tartget page title: ", target_page.title())
+                    print("\33[32mtartget page url: ", target_page.url, "\33[0m")
+                    print("\33[32mtartget page title: ", target_page.title(), "\33[0m")
                    break
        if target_page is None:
+            logger.error("Your tab is not the target tab.")
            return {}
        return_json = {}
        if config["category"] == "class":
--- a/desktop_env/evaluators/getters/file.py
+++ b/desktop_env/evaluators/getters/file.py
@@ -1,6 +1,7 @@
 import os
 from typing import Dict, List, Set
 from typing import Optional, Any, Union
+from datetime import datetime
 import requests
 import pandas as pd

@@ -77,21 +78,37 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
        gives (List[int]): optional. defaults to [0]. which files are directly
          returned to the metric. if len==1, str is returned; else, list is
          returned.
+        only support for single file now:
+        time_suffix(bool): optional. defaults to False. if True, append the current time in required format.
+        time_format(str): optional. defaults to "%Y_%m_%d". format of the time suffix.
    """
-
+    time_format = "%Y_%m_%d"
    if not config.get("multi", False):
        paths: List[str] = [config["path"]]
        dests: List[str] = [config["dest"]]
+        print(config)
+        if "time_suffix" in config.keys() and config["time_suffix"]:
+            if "time_format" in config.keys():
+                time_format = config["time_format"]
+            # Insert time before . in file type suffix
+            paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths]
+            dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests]
    else:
        paths: List[str] = config["path"]
        dests: List[str] = config["dest"]
+    print(paths)
+    print(dests)
+
    cache_paths: List[str] = []

    gives: Set[int] = set(config.get("gives", [0]))

    for i, (p, d) in enumerate(zip(paths, dests)):
+        print("env cache_dir: ")
+        print(env.cache_dir)
        _path = os.path.join(env.cache_dir, d)
-
+        print("_path: ")
+        print(_path)
        file = env.controller.get_file(p)
        if file is None:
            #return None
@@ -104,7 +121,9 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
            cache_paths.append(_path)
        with open(_path, "wb") as f:
            f.write(file)
-
+    # debug
+    print("cache_paths")
+    print(cache_paths)
    return cache_paths[0] if len(cache_paths)==1 else cache_paths


--- a/desktop_env/evaluators/getters/misc.py
+++ b/desktop_env/evaluators/getters/misc.py
@@ -195,3 +195,10 @@ def get_accessibility_tree(env, *args) -> str:
    accessibility_tree: str = env.controller.get_accessibility_tree()
    logger.debug("AT@eval: %s", accessibility_tree)
    return accessibility_tree
+
+def get_time_diff_range(env, config) -> str:
+    try:
+        return config["diff_range_in_minutes"]
+    except:
+        logger.error("diff_range_in_minutes not found in config.")
+        return None
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -2,7 +2,8 @@ from .basic_os import (
    check_gnome_favorite_apps,
    is_utc_0,
    check_text_enlarged,
-    check_moved_jpgs
+    check_moved_jpgs,
+    is_in_vm_clickboard
 )
 from .chrome import (
    is_expected_tabs,
@@ -19,6 +20,7 @@ from .chrome import (
    is_expected_active_tab,
    is_expected_url_pattern_match,
    is_added_to_steam_cart,
+    is_expected_installed_extensions,
    compare_pdf_images
 )
 from .docs import (
@@ -47,6 +49,7 @@ from .docs import (
    check_file_exists,
    check_tabstops,
    compare_contains_image,
+    compare_docx_files_and_ignore_new_lines,
    compare_docx_images,
    compare_image_text,
    compare_references
@@ -62,6 +65,14 @@ from .general import (
    fuzzy_match,
    check_include_exclude,
    check_direct_json_object,
+    compare_time_in_speedtest_results,
+    is_included_all_json_objects,
+    is_gold_text_included_in_pdf,
+    check_csv_line_number,
+    file_contains,
+    compare_terminal_and_txt,
+    fuzzy_place_math,
+    compare_python_pure_text,
    diff_text_file,
    literal_match
 )
@@ -69,7 +80,7 @@ from .gimp import (
    check_brightness_decrease_and_structure_sim,
    check_contrast_increase_and_structure_sim,
    check_saturation_increase_and_structure_sim,
-    check_image_size_and_structure_sim,
+    check_image_size,
    check_image_mirror,
    check_palette_and_structure_sim,
    check_textbox_on_leftside,
@@ -82,7 +93,9 @@ from .gimp import (
    increase_saturation,
    decrease_brightness,
    check_file_exists,
-    compare_triangle_positions
+    compare_triangle_positions,
+    check_sharper,
+    check_image_file_size
 )
 from .libreoffice import check_libre_locale
 from .pdf import check_pdf_pages
@@ -126,13 +139,16 @@ from .vscode import (
    compare_text_file,
    compare_config,
    compare_answer,
+    compare_result_files,
    is_extension_installed,
    check_json_settings,
    check_json_keybindings,
    check_python_file_by_test_suite,
    check_python_file_by_gold_file,
+    check_html_background_image,
    compare_zip_files
 )
+from .calc import compare_conference_city_in_order
 from .others import compare_epub, check_mp3_meta

 def infeasible():
--- a/desktop_env/evaluators/metrics/basic_os.py
+++ b/desktop_env/evaluators/metrics/basic_os.py
@@ -56,3 +56,15 @@ def check_moved_jpgs(directory_list, rule):
        return 1
    else:
        return 0
+
+def is_in_vm_clickboard(config, terminal_output):
+    print("terminal_output: ")
+    print(terminal_output)
+    print("config: ")
+    print(config)
+    expected_results = config["expected"]
+    # check if terminal_output has expected results
+    if not isinstance(expected_results, list):
+        return 1 if expected_results in terminal_output else 0
+    else:
+        return 1 if all(result in terminal_output for result in expected_results) else 0
--- a/desktop_env/evaluators/metrics/calc.py
+++ b/desktop_env/evaluators/metrics/calc.py
@@ -0,0 +1,27 @@
+import openpyxl
+
+def compare_conference_city_in_order( actual_city_list_path, expected_city):
+    expected_city_list = expected_city["expected"]
+    print(f"Reading csv file from {actual_city_list_path}")
+    wb = openpyxl.load_workbook(actual_city_list_path)
+    sheet = wb.active
+    actual_city_list = []
+    for row in sheet["C2:C22"]:
+        for cell in row:
+            actual_city_list.append(cell.value)
+    # expected_city is the city that we want to compare with the actual city list
+    # must in order index
+    # debug
+    print("expected_city_list:")
+    print(expected_city_list)
+    print("actual_city_list_path:")
+    print(actual_city_list)
+    wrong_list = []
+    try:
+        for i in range(len(actual_city_list)):
+            if expected_city_list[i] not in actual_city_list[i]:
+                wrong_list.append(i)
+                print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+    except:
+        return False
+    return True if len(wrong_list) == 0 else False
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -2,9 +2,9 @@ import logging
 import os
 import re
 import shutil
+from itertools import product
 from typing import Any, Dict, List, Union

-import fitz  # PyMuPDF
 import rapidfuzz.fuzz as fuzz
 from bs4 import BeautifulSoup, Tag

@@ -61,6 +61,12 @@ def is_expected_url_pattern_match(result, rules) -> float:
    return 1.


+def is_expected_installed_extensions(installed_extensions, expected) -> float:
+    print("installed_extensions: ")
+    print(installed_extensions)
+    expected_extensions = expected["expected"]
+    return 1 if expected_extensions == installed_extensions else 0. # must equal, no additional extensions allowed  
+
 def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
    """
    Checks if the expected tabs are open in Chrome.
@@ -94,12 +100,24 @@ def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
    elif rule['type'] == "liked_authors_websites_urls":
        # Check if "liked authors" folder exists
        liked_authors_folder = next((bookmark for bookmark in bookmarks['bookmark_bar']['children'] if
-                                      bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
+                                     bookmark['type'] == 'folder' and bookmark['name'] == 'Liked Authors'), None)
        if liked_authors_folder:
            # Check if it contains the specified URLs
            liked_authors_urls = [bookmark['url'] for bookmark in liked_authors_folder['children'] if
                                  bookmark['type'] == 'url']
-            return 1. if set(liked_authors_urls) == set(rule['urls']) else 0.
+
+            urls = rule['urls']
+
+            for idx, url in enumerate(urls):
+                if isinstance(url, str):
+                    urls[idx] = [url]
+
+            combinations = product(*urls)
+
+            for combination in combinations:
+                if set(combination) == set(liked_authors_urls):
+                    return 1.
+            return 0.
        else:
            return 0.
    else:
@@ -140,15 +158,16 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st
            logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
    return score / len(pdf2_path)

+
 import fitz
 from PIL import Image
-from io import BytesIO
 from borb.pdf import Document
 from borb.pdf import PDF

 from pathlib import Path
 import typing

+
 def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
    def extract_images_from_pdf(pdf_path):
        pdf_document = fitz.open(pdf_path)
@@ -163,14 +182,14 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
            images.append(img)

        return images
-    
+
    def fix_pdf(in_path: Path, out_path: Path) -> None:
        doc: typing.Optional[Document] = None
        with open(in_path, "rb") as fh:
            doc = PDF.loads(fh)
        with open(out_path, "wb") as fh:
            PDF.dumps(fh, doc)
-    
+
    fix_pdf(Path(pdf1_path), Path(pdf1_path))
    fix_pdf(Path(pdf2_path), Path(pdf2_path))

@@ -183,7 +202,7 @@ def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float:
    for img1, img2 in zip(images1, images2):
        if img1.tobytes() != img2.tobytes():
            return 0.
-    
+
    return 1.


--- a/desktop_env/evaluators/metrics/demo.py
+++ b/desktop_env/evaluators/metrics/demo.py
@@ -0,0 +1,28 @@
+import fitz  # PyMuPDF
+
+def extract_answers_from_pdf(pdf_file):
+    # 打开PDF文件
+    doc = fitz.open(pdf_file)
+    answers = []
+
+    # 遍历每一页
+    for page in doc:
+        # 提取当前页的文本
+        text = page.get_text() 
+        # 分割文本为行
+        lines = text.split('\n')
+        for line in lines:
+            if line.strip():  # 排除空白行
+                # 分割等号，提取答案
+                parts = line.split('=')
+                if len(parts) > 1:
+                    answer = parts[-1].strip()  # 取等号后的部分为答案
+                    answers.append(answer)
+    
+    return answers
+
+# 假设你的文件名是'math_problems.pdf'
+pdf_file = '/Users/lxc/Desktop/calculus.pdf'
+answers = extract_answers_from_pdf(pdf_file)
+for i, answer in enumerate(answers, 1):
+    print(f"题目{i}的答案是: {answer}")
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -3,8 +3,10 @@ import os
 import re
 import xml.etree.ElementTree as ET
 import zipfile
+from io import BytesIO
 from typing import List, Dict, Any

+from PIL import Image
 from docx import Document
 from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
 from docx.shared import RGBColor
@@ -23,6 +25,9 @@ def find_default_font(config_file_path, rules):
    default_font = None
    expected_font = rules["font_name"]

+    if not config_file_path:
+        return 0
+
    try:
        tree = ET.parse(config_file_path)
        root = tree.getroot()
@@ -42,6 +47,9 @@ def find_default_font(config_file_path, rules):


 def contains_page_break(docx_file):
+    if not docx_file:
+        return 0
+
    doc = Document(docx_file)

    namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
@@ -62,6 +70,9 @@ def compare_docx_files(file1, file2, **options):
    ignore_order = options.get('ignore_order', False)
    content_only = options.get('content_only', False)

+    if not file1 or not file2:
+        return 0
+
    def get_paragraph_texts_odt(document):
        paragraphs = document.getElementsByType(P)
        paragraph_texts = []
@@ -118,20 +129,30 @@ def compare_docx_files(file1, file2, **options):
        if text1 != text2:
            return 0
    else:
+        print("ignore_blanks=false")
        if len(doc1_paragraphs) != len(doc2_paragraphs):
+            print(doc1_paragraphs)
+            print(doc2_paragraphs)
+            print(len(doc1_paragraphs))
+            print(len(doc2_paragraphs))
            return 0
-
+        print("in compare")
        # Compare each paragraph
        for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
            if ignore_case:
                p1, p2 = p1.lower(), p2.lower()
            if p1 != p2:
+                print(p1)
+                print(p2)
                return 0

    return 1


 def compare_init_lines(file1, file2):
+    if not file1 or not file2:
+        return 0
+
    doc1 = Document(file1)
    doc2 = Document(file2)

@@ -149,6 +170,9 @@ def compare_init_lines(file1, file2):


 def compare_docx_tables(docx_file1, docx_file2):
+    if not docx_file1 or not docx_file2:
+        return 0
+
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)

@@ -174,11 +198,10 @@ def compare_docx_tables(docx_file1, docx_file2):
    return 1


-from io import BytesIO
-from PIL import Image
-
-
 def compare_docx_images(docx_file1, docx_file2):
+    if not docx_file1 or not docx_file2:
+        return 0
+
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)

@@ -212,6 +235,9 @@ def compare_image_text(image_path, rule):


 def compare_line_spacing(docx_file1, docx_file2):
+    if not docx_file1 or not docx_file2:
+        return 0
+
    if not compare_docx_files(docx_file1, docx_file2):
        return 0
    doc1 = Document(docx_file1)
@@ -233,6 +259,9 @@ def compare_line_spacing(docx_file1, docx_file2):


 def compare_insert_equation(docx_file1, docx_file2):
+    if not docx_file1 or not docx_file2:
+        return 0
+
    if not compare_docx_files(docx_file1, docx_file2):
        return 0

@@ -248,6 +277,9 @@ def compare_insert_equation(docx_file1, docx_file2):


 def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
+    if not docx_file:
+        return 0
+
    doc = Document(docx_file)
    expected_font = rules["font_name"]

@@ -260,6 +292,9 @@ def compare_font_names(docx_file, rules: List[Dict[str, Any]]):


 def compare_subscript_contains(docx_file1, docx_file2):
+    if not docx_file1 or not docx_file2:
+        return 0
+
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)

@@ -272,6 +307,9 @@ def compare_subscript_contains(docx_file1, docx_file2):


 def has_page_numbers_in_footers(docx_file):
+    if not docx_file:
+        return 0
+
    doc = Document(docx_file)

    for section in doc.sections:
@@ -286,6 +324,9 @@ def has_page_numbers_in_footers(docx_file):


 def is_first_line_centered(docx_file):
+    if not docx_file:
+        return 0
+
    doc = Document(docx_file)
    first_paragraph = doc.paragraphs[0]

@@ -294,11 +335,16 @@ def is_first_line_centered(docx_file):


 def check_file_exists(directory, filename):
+    if not directory or not filename:
+        return 0
    file_path = os.path.join(directory, filename)
    return 1 if os.path.isfile(file_path) else 0


 def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
+    if not docx_file1 or not docx_file2:
+        return .0
+
    doc1: Document = Document(docx_file1)
    doc2: Document = Document(docx_file2)
    para1 = [p for p in doc1.paragraphs if p.text.strip()]
@@ -334,6 +380,9 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:


 def compare_contains_image(docx_file1, docx_file2):
+    if not docx_file1 or not docx_file2:
+        return 0
+
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)

@@ -346,6 +395,9 @@ def compare_contains_image(docx_file1, docx_file2):


 def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
+    if not file_path1 or not file_path2:
+        return 0
+
    if not compare_docx_files(file_path1, file_path2):
        return 0
    document = Document(file_path1)
@@ -380,6 +432,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):


 def check_highlighted_words(file_path1, file_path2):
+    if not file_path1 or not file_path2:
+        return 0
+
    if not compare_docx_files(file_path1, file_path2):
        return 0

@@ -402,6 +457,9 @@ def check_highlighted_words(file_path1, file_path2):


 def evaluate_strike_through_last_paragraph(file_path1, file_path2):
+    if not file_path1 or not file_path2:
+        return 0
+
    if not compare_docx_files(file_path1, file_path2):
        return 0
    document = Document(file_path1)
@@ -418,6 +476,9 @@ def evaluate_strike_through_last_paragraph(file_path1, file_path2):


 def evaluate_conversion(file_path):
+    if not file_path:
+        return 0
+
    document = Document(file_path)

    for table in document.tables:
@@ -437,6 +498,9 @@ def evaluate_conversion(file_path):


 def evaluate_spacing(file_path):
+    if not file_path:
+        return 0
+
    document = Document(file_path)

    # Check line spacing for introduction, body, and conclusion
@@ -450,6 +514,9 @@ def evaluate_spacing(file_path):


 def check_italic_font_size_14(path1, path2):
+    if not path1 or not path2:
+        return 0
+
    if not compare_docx_files(path1, path2):
        return 0
    document = Document(path1)
@@ -463,6 +530,9 @@ def check_italic_font_size_14(path1, path2):


 def evaluate_alignment(docx_path):
+    if not docx_path:
+        return 0
+
    # Load the document
    doc = Document(docx_path)

@@ -492,6 +562,9 @@ def evaluate_alignment(docx_path):


 def get_unique_train_ids(initial_file):  # fixed standard
+    if not initial_file:
+        return set(), 0
+
    doc = Document(initial_file)
    train_ids = set()
    processed_lines = 0
@@ -508,6 +581,9 @@ def get_unique_train_ids(initial_file):  # fixed standard


 def check_no_duplicates(initial_file, processed_file):
+    if not initial_file or not processed_file:
+        return 0
+
    # Open the document
    train_ids_ini, ini_lines = get_unique_train_ids(initial_file)
    doc_processed = Document(processed_file)
@@ -535,6 +611,9 @@ def check_no_duplicates(initial_file, processed_file):


 def compare_docx_lines(file1, file2):
+    if not file1 or not file2:
+        return 0
+
    # Read the text of the document, line by line
    doc1 = Document(file1)
    doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
@@ -551,8 +630,47 @@ def compare_docx_lines(file1, file2):
        return 0


+def compare_docx_files_and_ignore_new_lines(file1, file2, **options):
+    ignore_blanks = options.get('ignore_blanks', True)
+
+    if not file1 or not file2:
+        return 0
+
+    # Determine file types and load documents
+    if file1.endswith('.docx') and file2.endswith('.docx'):
+        doc1 = Document(file1)
+        doc2 = Document(file2)
+        # First, delete all the blank in paragraphs
+        doc1 = [p for p in doc1.paragraphs if p.text != '']
+        doc2 = [p for p in doc2.paragraphs if p.text != '']
+        doc1_paragraphs = [p.text for p in doc1]
+        doc2_paragraphs = [p.text for p in doc2]
+    else:
+        # Unsupported file types or mismatch
+        print("Unsupported file types or mismatch between file types.")
+        return 0
+
+    # Process and compare documents
+    if ignore_blanks:
+        text1 = re.sub(r'\s+', ' ', '\n'.join(doc1_paragraphs)).strip()
+        text2 = re.sub(r'\s+', ' ', '\n'.join(doc2_paragraphs)).strip()
+        if text1 != text2:
+            return 0
+    else:
+        if len(doc1_paragraphs) != len(doc2_paragraphs):
+            return 0
+        # Compare each paragraph
+        for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs):
+            if p1 != p2:
+                return 0
+    return 1
+
+
 # Docx file saved in the ubuntu cannot use this function to compare highlight, don't know why, deprecated
 def compare_highlighted_text(file1, file2):
+    if not file1 or not file2:
+        return 0
+
    def extract_highlighted_text(file_path):
        highlighted_texts = []

@@ -590,6 +708,9 @@ def compare_highlighted_text(file1, file2):


 def compare_references(file1, file2, **options):
+    if not file1 or not file2:
+        return 0
+
    reference_indicator = options.get('reference_indicator', 'References')
    reference_base_result = options.get('reference_base_result', 0.5)

@@ -634,48 +755,3 @@ def compare_references(file1, file2, **options):
        return (result - reference_base_result) / (1 - reference_base_result)
    else:
        return 0
-
-
-def compare_answer(file1, file2, **options):
-    """This is a specific function to compare the """
-    # Determine file types and load documents
-    if file1.endswith('.docx') and file2.endswith('.docx'):
-        doc1 = Document(file1)
-        doc2 = Document(file2)
-        doc1_paragraphs = [p.text for p in doc1.paragraphs]
-        doc2_paragraphs = [p.text for p in doc2.paragraphs]
-    else:
-        # Unsupported file types or mismatch
-        print("Unsupported file types or mismatch between file types.")
-        return 0
-
-    # Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list
-    ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1
-    ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1
-
-    if ref1_idx == -1 and ref2_idx == -1:
-        return 1
-
-    if ref1_idx == -1 or ref2_idx == -1:
-        return 0
-
-    # split the reference section into reference items, and remove the empty string items
-    ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()]
-    ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()]
-
-    # Compare the references
-
-    if len(ref1) != len(ref2):
-        return 0
-
-    total_similarity = 0
-    for r1, r2 in zip(ref1, ref2):
-        # fuzzy match the references
-        similarity = fuzz.ratio(r1, r2) / 100.0
-        total_similarity += similarity
-
-    result = total_similarity / len(ref1)
-    if result >= reference_base_result:
-        return (result - reference_base_result) / (1 - reference_base_result)
-    else:
-        return 0
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -1,7 +1,9 @@
 import csv
+import datetime
+import difflib
 import functools
 import json
-import yaml
+import logging
 import operator
 import re
 import sqlite3
@@ -10,17 +12,19 @@ from typing import Callable, Any, Union
 from typing import Dict, List, Pattern

 import lxml.etree
+import pandas as pd
+import pdfplumber
+import yaml
+from docx import Document
 from lxml.cssselect import CSSSelector
 from lxml.etree import _Element
 from rapidfuzz import fuzz
-import difflib

 from .utils import _match_record, _match_value_to_rule

-import logging
-
 logger = logging.getLogger("desktopenv.metric.general")

+
 def check_include_exclude(result: str, rules: Dict[str, List[str]]) -> float:
    if result is None:
        return 0.
@@ -68,6 +72,7 @@ def is_in_list(result, rules) -> float:
    else:
        return 0.

+
 def diff_text_file(result: str, expect: str) -> float:
    if result is None:
        return 0.
@@ -78,12 +83,32 @@ def diff_text_file(result: str, expect: str) -> float:
        expected_lines: List[str] = f.read().splitlines()
    return difflib.SequenceMatcher(a=result_lines, b=expected_lines).ratio()

+
 def fuzzy_match(result, rules) -> float:
    expect = rules["expected"]

    return fuzz.ratio(result, expect) / 100.


+def fuzzy_place_math(result_file_path, rules) -> float:
+    expect = rules["expected"]  # a list of possible answers
+    # read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line
+    doc = Document(result_file_path)
+    words_list = []
+    for para in doc.paragraphs:
+        words_list.extend(para.text.split())
+    # Print out the list of extracted words
+    print("Your Answers: ")
+    print(words_list)
+    fuzzy_score_list = []
+    for word in words_list:
+        max_score = 0
+        for ans in expect:
+            score = fuzz.ratio(word, ans)
+            max_score = max(max_score, score)
+        fuzzy_score_list.append(max_score)
+    return sum(fuzzy_score_list) / len(fuzzy_score_list)
+
 def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
    """
    Args:
@@ -191,10 +216,10 @@ def check_accessibility_tree(result: str, rules: List[Dict[str, Any]]) -> float:
            return 0.

        if "text" in r:
-            match_func: Callable[[str], Number] = functools.partial( operator.eq if r["exact"] \
-                                                                               else (lambda a, b: fuzz.ratio(a, b) / 100.)
-                                                                   , r["text"]
-                                                                   )
+            match_func: Callable[[str], Number] = functools.partial(operator.eq if r["exact"] \
+                                                                        else (lambda a, b: fuzz.ratio(a, b) / 100.)
+                                                                    , r["text"]
+                                                                    )
            match_score: Number = 0
            for elm in elements:
                match_score = max(match_score, match_func(elm.text or None))
@@ -267,20 +292,157 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
    return float(metric)


-def check_direct_json_object(result, rules)->float:
+def check_direct_json_object(result, rules) -> float:
    """
    One of the most commonly used function to evalute.
    Compare two json objects directly.
    """
+    if isinstance(result, str):
+        # remove blanks before and after result
+        result = result.strip()
+        # replace all ' with "
+        result = result.replace("'", '"')
+        # load json object
+        result = json.loads(result)
    print("result: ")
    print(result)
    print("expected: ")
    print(rules["expected"])
    if result is None:
        return 0.
-    expected_json = rules["expected"]
-    for key in expected_json.keys():
-        expected_value = expected_json.get(key)
-        if expected_value != result.get(key):
-            return 0.
-    return 1.0
+
+    expect_in_result = rules.get("expect_in_result", False)
+    if not expect_in_result:
+        expected_json = rules["expected"]
+        for key in expected_json.keys():
+            expected_value = expected_json.get(key)
+            if expected_value != result.get(key):
+                return 0.
+        return 1.0
+    else:
+        expected_json = rules["expected"]
+        for key in expected_json.keys():
+            expected_value = expected_json.get(key)
+            if expected_value not in result.get(key):
+                return 0.
+        return 1.0
+
+
+def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
+    if not speedtest_result_path:
+        return 0
+
+    # open the speedtest results file(csv)
+    date_col = None
+    with open(speedtest_result_path, 'r') as f:
+        reader = pd.read_csv(f)
+        for column in reader.columns:
+            if column.startswith('TEST_DATE'):
+                date_col = column
+                break
+        now_date_time = datetime.datetime.now().strftime('%H:%M')
+        for date in reader[date_col]:
+            date_time = date[-5:]
+            # compare the date time with the current date time, if time diff less than time_diff para, then return true
+            if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time,
+                                                                                                    '%H:%M')).total_seconds()) / 60 < int(
+                    time_diff):
+                return 0
+        return 1
+
+
+def is_included_all_json_objects(gold_file_path, result_file_path):
+    if not gold_file_path or not result_file_path:
+        return 0
+
+    print("gold_file_path: ")
+    print(gold_file_path)
+    print("result_file_path: ")
+    print(result_file_path)
+    # two json file, check if all the key-value pair in gold_file_path is included in result_file_path
+    with open(gold_file_path, 'r') as f:
+        gold_json = json.load(f)
+    with open(result_file_path, 'r') as fr:
+        result_json = json.load(fr)
+    for key in gold_json.keys():
+        if key not in result_json.keys() or gold_json[key] != result_json[key]:
+            return 0
+    return 1
+
+
+def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
+    print("gold_text_path: ")
+    print(gold_text_path)
+    print("pdf_file_path: ")
+    print(pdf_file_path)
+    # gold file is a json file, we need to check all the value in json are included in pdf file.
+    with open(gold_text_path, 'r') as f:
+        gold_json = json.load(f)
+    with pdfplumber.open(pdf_file_path) as pdf:
+        text = ''
+        for page in pdf.pages:
+            text += page.extract_text()
+    false_list = []
+    for key in gold_json.keys():
+        if gold_json[key] not in text:
+            false_list.append(key)
+    if len(false_list) > 0:
+        print("false_list: ")
+        print(false_list)
+        return 0
+    else:
+        return 1
+
+
+def file_contains(file_path, config):
+    # file_path ends with .txt
+    if not file_path:
+        return 1
+    with open(file_path, 'r') as f:
+        file_text = f.read()
+    for text in config["expected"]:
+        if text not in file_text:
+            return 0
+    return 1
+
+
+def check_csv_line_number(file_path, line_number):
+    # check file_path suffix
+    if not file_path.endswith('.csv'):
+        return 0
+    # check line number
+    with open(file_path, 'r') as f:
+        reader = csv.reader(f)
+        line_count = sum(1 for row in reader)
+    return 1 if line_count == int(line_number["expected"]) else 0
+
+
+def compare_terminal_and_txt(txt_file_path, terminal_output):
+    # read txt file content
+    with open(txt_file_path, 'r') as f:
+        txt_file_content = f.read()
+    # compare terminal output with txt file content
+    return 1 if terminal_output == txt_file_content else 0
+
+
+def compare_python_pure_text(py_file_path, gold_file_path):
+    # first, change the suffix of gold_file from .txt to .py
+    print("py_file_path: ")
+    print(py_file_path)
+    print("gold_file_path: ")
+    print(gold_file_path)
+
+    # gold_file_path = gold_file_path.replace('.txt', '.py')
+    def remove_whitespace(text):
+        return ''.join(text.split())
+
+    with open(py_file_path, 'r') as file1:
+        content1 = file1.read()
+    with open(gold_file_path, 'r') as file2:
+        content2 = file2.read()
+    content1_no_whitespace = remove_whitespace(content1)
+    content2_no_whitespace = remove_whitespace(content2)
+    if content1_no_whitespace == content2_no_whitespace:
+        return 1
+    else:
+        return 0
--- a/desktop_env/evaluators/metrics/gimp.py
+++ b/desktop_env/evaluators/metrics/gimp.py
@@ -5,7 +5,7 @@ from PIL import Image, ImageChops, ImageStat


 def compare_image_list(pred_img_path_list: Union[str, List[str]],
-                   gold_img_path_list: Union[str, List[str]]) -> float:
+                       gold_img_path_list: Union[str, List[str]]) -> float:
    """ Compare two image lists, only if all images are the same, return 1.0, otherwise return 0.0
    """
    if type(pred_img_path_list) != list:
@@ -177,6 +177,16 @@ def calculate_contrast(image):
    return np.std(pixels)


+def calculate_image_sharpness(image_path):
+    # Load the image in grayscale
+    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+    # Apply the Laplacian operator
+    laplacian = cv2.Laplacian(image, cv2.CV_64F)
+    # Calculate the variance
+    variance = np.var(laplacian)
+    return variance
+
+
 def structure_check_by_mse(img1, img2, threshold=0.03):
    """Check if two images are approximately the same by MSE"""
    mse = np.mean(
@@ -295,7 +305,8 @@ def check_triangle_position(tgt_path):

    # We assume the triangle is a different color from the background
    # Find the unique colors
-    unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0, return_counts=True)
+    unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0,
+                                      return_counts=True)
    unique_colors_sorted = unique_colors[np.argsort(counts)]

    # Assuming the background is the most common color and the triangle is a different color
@@ -337,6 +348,25 @@ def check_structure_sim(src_path, tgt_path):
    return structure_same


+def check_structure_sim_resized(src_path, tgt_path):
+    """
+    Check if the structure of the two images are similar after resizing.
+    gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15
+    """
+    if src_path is None or tgt_path is None:
+        return 0.
+
+    img_src = Image.open(src_path)
+    img_tgt = Image.open(tgt_path)
+
+    # Resize the images to the same size
+    img_src = img_src.resize(img_tgt.size)
+
+    # Check if the structure is similar
+    structure_same = structure_check_by_ssim(img_src, img_tgt)
+    return structure_same
+
+
 def check_contrast_increase_and_structure_sim(src_path, tgt_path):
    """
    Check if the src image has higher contrast than the tgt image and the structures are similar
@@ -388,34 +418,28 @@ def check_config_status(actual_config_path, rule):
    return 0.


-def check_image_size_and_structure_sim(src_path, tgt_path, height=512, width=None):
+def check_image_size(src_path, rule):
    """
-    Check if the size of the src image is correct and the structure of the two images are similar.
-    gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15
+    Check if the size of the src image is correct
+    multi-apps:42f4d1c7-4521-4161-b646-0a8934e36081
    """
-
-    if src_path is None or tgt_path is None:
+    if src_path is None:
        return 0.

-    # Load images
-    source_image = Image.open(src_path)
-    target_image = Image.open(tgt_path)
+    # Load the image
+    img = Image.open(src_path)

-    # Check size
-    if width is not None:
-        width_same = source_image.size[0] == width
-    else:
-        width_same = True
-    if height is not None:
-        height_same = source_image.size[1] == height
+    # Check the size
+    if rule["height"] is not None:
+        height_same = img.size[1] == rule["height"]
    else:
        height_same = True
+    if rule["width"] is not None:
+        width_same = img.size[0] == rule["width"]
+    else:
+        width_same = True

-    # Check structure
-    resized_target_image = target_image.resize(source_image.size)
-    structure_same = structure_check_by_ssim(source_image, resized_target_image)
-
-    if width_same and height_same and structure_same:
+    if height_same and width_same:
        return 1.
    else:
        return 0.
@@ -521,6 +545,31 @@ def check_green_background(src_path, tgt_path):
    return 1.


+def check_sharper(src_path, tgt_path):
+    """
+    Check if the source image is sharper than the target image.
+    multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
+    """
+    sharpness_src = calculate_image_sharpness(src_path)
+    sharpness_tgt = calculate_image_sharpness(tgt_path)
+    return 1.0 if sharpness_src > sharpness_tgt else 0.0
+
+
+def check_image_file_size(src_path, rule):
+    """
+    Check if the size of the src image within 500KB
+    """
+    if src_path is None:
+        return 0.0
+
+    # Check the size
+    file_size = os.path.getsize(src_path)
+    if file_size < rule["max_size"]:
+        return 1.0
+    else:
+        return 0.0
+
+
 if __name__ == "__main__":
    actual_config_path = "../../../cache/sessionrc_test"
    rule = {
@@ -550,3 +599,12 @@ if __name__ == "__main__":
    tgt_path = "../../../cache/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce/Triangle_In_The_Middle.png"
    print(check_triangle_position(tgt_path))

+    src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi_sharper.png"
+    tgt_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi.png"
+    print(check_sharper(src_path, tgt_path))
+
+    src_path = "../../../cache/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f/compressed.jpeg"
+    rule = {
+        "max_size": 500000
+    }
+    print(check_image_file_size(src_path, rule))
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -1,10 +1,12 @@
 import builtins
+import datetime
 import functools
 import itertools
 import logging
 import operator
 import re
 import zipfile
+import pandas as pd
 from typing import Any, TypeVar, Union, Iterable, Optional, Callable
 from typing import Dict, List, Set, Match, Tuple, Pattern
 from urllib.parse import urlparse, urlunparse
--- a/desktop_env/evaluators/metrics/vscode.py
+++ b/desktop_env/evaluators/metrics/vscode.py
@@ -229,3 +229,54 @@ def check_python_file_by_test_suite(actual_files, test_file, **options) -> float

 def check_python_file_by_gold_file(actual_files, gold_file: str, **options) -> float:
    pass
+
+
+def check_html_background_image(src_path: str, rule: Dict = None) -> float:
+    """
+    Check if the background image is correctly set.
+    multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
+    """
+    from bs4 import BeautifulSoup
+    with open(src_path, 'r') as f:
+        html_content = f.read()
+    soup = BeautifulSoup(html_content, 'html.parser')
+    styles = soup.find_all('style')
+    for style in styles:
+        if f'background-image: url(\'{rule["value"]}\')' in style.text:
+            return 1.0
+    return 0.0
+
+
+def compare_result_files(src_path, tgt_path):
+    """
+    Compare whether the content of two files are the same.
+    multi-app:7f35355e-02a6-45b5-b140-f0be698bcf85
+    """
+    with open(src_path, 'r') as f:
+        src_content = f.read().strip()
+    with open(tgt_path, 'r') as f:
+        tgt_content = f.read().strip()
+    try:
+        # Compare the content as numbers
+        tgt_content_num = float(tgt_content)
+        if tgt_content in src_content:
+            # If the content of tgt is in src, return 1.0 since output src might be
+            # a superset(language description+number) of tgt
+            return 1.0
+        src_content_num = float(src_content)
+        if abs(src_content_num - tgt_content_num) < 1e-4:
+            return 1.0
+        return 0.0
+    except:
+        if src_content == tgt_content:
+            return 1.0
+    return 0.0
+
+
+if __name__ == "__main__":
+    src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/index.html"
+    rule = {
+        "type:": "value",
+        "value": "anmi_sharper.png"
+    }
+    print(check_html_background_image(src_path, rule))