sci-gui-agent-benchmark/desktop_env/evaluators/metrics/vscode.py

import copy
import importlib.util
import json
import sys
from typing import Dict


def check_json_keybindings(actual: str, expected: str, **options) -> float:
    """
    Args:
        actual (str): path to result text file
        expected (str): expected dict{}

    Return:
        float: the score
    """

    def direct_load_json(fp):
        try:
            with open(fp, 'r') as f:
                data = json.load(f)
            return data
        except:
            return None

    def skip_first_line_load_json(fp):
        try:
            with open(fp, 'r') as f:
                f.readline()
                data = json.load(f)
            return data
        except:
            return None

    for func in [direct_load_json, skip_first_line_load_json]:
        data = func(actual)
        if data is not None and type(data) == list:
            break
    else:
        return 0.0
    expected = expected['expected']
    if expected in data:
        return 1.0
    else:
        return 0.0


def check_json_settings(actual: str, expected: str, **options) -> float:
    """
    Args:
        actual (str): path to result text file
        expected (dict): expected dict{}, containing key "expect"

    Return:
        float: the score
    """
    if not actual:
        return 0.

    with open(actual, 'r') as f:
        data = json.load(f)

    expect = expected['expected']
    data_copy = copy.deepcopy(data)
    data_copy.update(expect)
    if data == data_copy:
        return 1.0
    else:
        return 0.0


def compare_text_file(actual: str, expected: str, **options) -> float:
    """
    Args:
        actual (str): path to result text file
        expected (str): path to gold text file

    Return:
        float: the score
    """
    if not actual:
        return 0.

    with open(actual) as f1:
        actual_text = f1.read()
    with open(expected) as f2:
        expected_text = f2.read()

    if actual_text == expected_text:
        return 1.0
    return 0.0

import zipfile

def compare_zip_files(actual: str, expected: str, **options) -> float:
    """
    Args:
        actual (str): path to result zip file
        expected (str): path to gold zip file

    Return:
        float: the score
    """
    if not actual:
        return 0.

    with zipfile.ZipFile(actual, 'r') as zip_file1, zipfile.ZipFile(expected, 'r') as zip_file2:
        file_list1 = set(zip_file1.namelist())
        file_list2 = set(zip_file2.namelist())

        if file_list1 != file_list2:
            return 0.0

        for file_name in file_list1:
            content1 = zip_file1.read(file_name)
            content2 = zip_file2.read(file_name)

            if content1 != content2:
                return 0.0
    return 1.0


def compare_config(actual: str, rules: Dict, **options) -> float:
    if not actual:
        return 0.

    with open(actual) as f1:
        actual_text = f1.read()

    if actual_text == rules['expected']:
        return 1.0
    return 0.0


def compare_answer(actual: str, rules: Dict, **options) -> float:
    """
    Args:
        actual (str): result string
        expected (str): gold string

    Return:
        float: the score
    """
    if not actual:
        return 0.

    if actual == rules['expected']:
        return 1.0

    # TODO: can use text embedding to get non-zero return
    return 0.0


def is_extension_installed(actual: str, rules: Dict, **options):
    if rules['type'] == 'contain':
        if rules['expected'] in actual:
            return 1.0
        return 0.0
    elif rules['type'] == 'not_contain':
        if rules['expected'] not in actual:
            return 1.0
        return 0.0
    else:
        raise NotImplementedError


def check_python_file_by_test_suite(actual_files, test_file, **options) -> float:
    """Check the python file by running the test suite in the given test file."""

    test_function_name = options.get('test_function_name', 'test')
    # Create a unique module name, it can be arbitrary but must be unique in the current runtime environment
    module_name = 'dynamic_module'

    # Load the module from the given file path
    spec = importlib.util.spec_from_file_location(module_name, test_file)
    module = importlib.util.module_from_spec(spec)
    sys.modules[module_name] = module  # Add the loaded module to sys.modules
    spec.loader.exec_module(module)  # Execute the module to make its content available

    # Retrieve the function by name from the loaded module and execute it
    test_function = getattr(module, test_function_name)
    try:
        if test_function():
            return 1.0
        else:
            return 0.0
    except Exception as e:
        return 0.0


def check_python_file_by_gold_file(actual_files, gold_file: str, **options) -> float:
    pass