Files
sci-gui-agent-benchmark/desktop_env/evaluators/metrics/vscode.py
2024-03-05 22:46:56 +08:00

193 lines
4.7 KiB
Python

import copy
import importlib.util
import json
import sys
from typing import Dict
def check_json_keybindings(actual: str, expected: str, **options) -> float:
"""
Args:
actual (str): path to result text file
expected (str): expected dict{}
Return:
float: the score
"""
def direct_load_json(fp):
try:
with open(fp, 'r') as f:
data = json.load(f)
return data
except:
return None
def skip_first_line_load_json(fp):
try:
with open(fp, 'r') as f:
f.readline()
data = json.load(f)
return data
except:
return None
for func in [direct_load_json, skip_first_line_load_json]:
data = func(actual)
if data is not None and type(data) == list:
break
else:
return 0.0
expected = expected['expected']
if expected in data:
return 1.0
else:
return 0.0
def check_json_settings(actual: str, expected: str, **options) -> float:
"""
Args:
actual (str): path to result text file
expected (dict): expected dict{}, containing key "expect"
Return:
float: the score
"""
if not actual:
return 0.
with open(actual, 'r') as f:
data = json.load(f)
expect = expected['expected']
data_copy = copy.deepcopy(data)
data_copy.update(expect)
if data == data_copy:
return 1.0
else:
return 0.0
def compare_text_file(actual: str, expected: str, **options) -> float:
"""
Args:
actual (str): path to result text file
expected (str): path to gold text file
Return:
float: the score
"""
if not actual:
return 0.
with open(actual) as f1:
actual_text = f1.read()
with open(expected) as f2:
expected_text = f2.read()
if actual_text == expected_text:
return 1.0
return 0.0
import zipfile
def compare_zip_files(actual: str, expected: str, **options) -> float:
"""
Args:
actual (str): path to result zip file
expected (str): path to gold zip file
Return:
float: the score
"""
if not actual:
return 0.
with zipfile.ZipFile(actual, 'r') as zip_file1, zipfile.ZipFile(expected, 'r') as zip_file2:
file_list1 = set(zip_file1.namelist())
file_list2 = set(zip_file2.namelist())
if file_list1 != file_list2:
return 0.0
for file_name in file_list1:
content1 = zip_file1.read(file_name)
content2 = zip_file2.read(file_name)
if content1 != content2:
return 0.0
return 1.0
def compare_config(actual: str, rules: Dict, **options) -> float:
if not actual:
return 0.
with open(actual) as f1:
actual_text = f1.read()
if actual_text == rules['expected']:
return 1.0
return 0.0
def compare_answer(actual: str, rules: Dict, **options) -> float:
"""
Args:
actual (str): result string
expected (str): gold string
Return:
float: the score
"""
if not actual:
return 0.
if actual == rules['expected']:
return 1.0
# TODO: can use text embedding to get non-zero return
return 0.0
def is_extension_installed(actual: str, rules: Dict, **options):
if rules['type'] == 'contain':
if rules['expected'] in actual:
return 1.0
return 0.0
elif rules['type'] == 'not_contain':
if rules['expected'] not in actual:
return 1.0
return 0.0
else:
raise NotImplementedError
def check_python_file_by_test_suite(actual_files, test_file, **options) -> float:
"""Check the python file by running the test suite in the given test file."""
test_function_name = options.get('test_function_name', 'test')
# Create a unique module name, it can be arbitrary but must be unique in the current runtime environment
module_name = 'dynamic_module'
# Load the module from the given file path
spec = importlib.util.spec_from_file_location(module_name, test_file)
module = importlib.util.module_from_spec(spec)
sys.modules[module_name] = module # Add the loaded module to sys.modules
spec.loader.exec_module(module) # Execute the module to make its content available
# Retrieve the function by name from the loaded module and execute it
test_function = getattr(module, test_function_name)
try:
if test_function():
return 1.0
else:
return 0.0
except Exception as e:
return 0.0
def check_python_file_by_gold_file(actual_files, gold_file: str, **options) -> float:
pass