diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index ed0dd86..527dbe5 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -123,7 +123,9 @@ from .vscode import ( compare_answer, is_extension_installed, check_json_settings, - check_json_keybindings + check_json_keybindings, + check_python_file_by_test_suite, + check_python_file_by_gold_file ) diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py index 058015f..ecf4e10 100644 --- a/desktop_env/evaluators/metrics/vscode.py +++ b/desktop_env/evaluators/metrics/vscode.py @@ -1,5 +1,7 @@ import copy +import importlib.util import json +import sys from typing import Dict @@ -133,9 +135,29 @@ def is_extension_installed(actual: str, rules: Dict, **options): raise NotImplementedError -def check_python_file_by_test_suite(actual_file, test_suites, **options) -> float: - pass +def check_python_file_by_test_suite(actual_files, test_file, **options) -> float: + """Check the python file by running the test suite in the given test file.""" + + test_function_name = options.get('test_function_name', 'test') + # Create a unique module name, it can be arbitrary but must be unique in the current runtime environment + module_name = 'dynamic_module' + + # Load the module from the given file path + spec = importlib.util.spec_from_file_location(module_name, test_file) + module = importlib.util.module_from_spec(spec) + sys.modules[module_name] = module # Add the loaded module to sys.modules + spec.loader.exec_module(module) # Execute the module to make its content available + + # Retrieve the function by name from the loaded module and execute it + test_function = getattr(module, test_function_name) + try: + if test_function(): + return 1.0 + else: + return 0.0 + except Exception as e: + return 0.0 -def check_python_file_by_gold_file(actual_file, gold_file: str, **options) -> float: +def check_python_file_by_gold_file(actual_files, gold_file: str, **options) -> float: pass diff --git a/evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json b/evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json index ce255ca..800f1e7 100644 --- a/evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json +++ b/evaluation_examples/examples/multi_apps/26150609-0da3-4a7d-8868-0faf9c5f01bb.json @@ -1,6 +1,6 @@ { "id": "26150609-0da3-4a7d-8868-0faf9c5f01bb", - "snapshot": "libreoffice_calc", + "snapshot": "vs_code", "instruction": "So, I've been dabbling with coding a Snake game in Python, and I finally got it up and running. It's pretty cool, but it's not without its quirks. The biggest issue I'm facing right now is that the snake can't seem to eat the food, no matter what. Could you help me tweak the code so the snake can actually eat the food? Thanks a bunch!", "source": "authors", "config": [ @@ -14,6 +14,16 @@ ] } }, + { + "type": "command", + "parameters": { + "command": [ + "pip", + "install", + "pygame" + ] + } + }, { "type": "download", "parameters": { @@ -54,12 +64,27 @@ ], "evaluator": { "postconfig": [], - "func": "", + "func": "check_python_file_by_test_suite", "result": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/snake/food.py", + "/home/user/Desktop/snake/main.py", + "/home/user/Desktop/snake/settings.py", + "/home/user/Desktop/snake/snake.py" + ], + "dest": [ + "food.py", + "main.py", + "settings.py", + "snake.py" + ], + "multi": true }, "expected": { - }, - "options": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1rLhp5cAS93uwCdZlikQse656Tf1Ut0Xc&export=download&authuser=0&confirm=t&uuid=6bb19b05-6832-4093-9441-634b9d0ba476&at=APZUnTVBJ1h-GgRmXMW_iagvZV1A:1709447052264", + "dest": "test_suite.py" } } } diff --git a/evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json b/evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json index 62b1c30..d3dbef2 100644 --- a/evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json +++ b/evaluation_examples/examples/multi_apps/9219480b-3aed-47fc-8bac-d2cffc5849f7.json @@ -1,7 +1,7 @@ { "id": "9219480b-3aed-47fc-8bac-d2cffc5849f7", - "snapshot": "libreoffice_calc", - "instruction": "Hi, I recently playing with developing a small python-based teris game. While I have finished most of the part, something is wrong under some cases when I press up to rotate, the whole program will crash, please run the code for me and fix the bugs of code.", + "snapshot": "vs_code", + "instruction": "Hi, I recently playing with developing a small python-based tetris game. While I have finished most of the part, something is wrong under some cases when I press up to rotate, the whole program will crash, please run the code for me and fix the bugs of code.", "source": "authors", "config": [ { @@ -10,7 +10,7 @@ "command": [ "mkdir", "-p", - "/home/user/Desktop/teris" + "/home/user/Desktop/tetris" ] } }, @@ -19,19 +19,19 @@ "parameters": { "files": [ { - "path": "/home/user/Desktop/teris/block.py", + "path": "/home/user/Desktop/tetris/block.py", "url": "https://drive.usercontent.google.com/download?id=1txPwaWML0e8cjpDW-nw5N6HOC6fYwfI1&export=download&authuser=0&confirm=t&uuid=0b69795a-3600-4ec4-be9b-533deeb76e94&at=APZUnTVK-VGsZfTufLSh_3eRigYb:1709303077428" }, { - "path": "/home/user/Desktop/teris/main.py", + "path": "/home/user/Desktop/tetris/main.py", "url": "https://drive.usercontent.google.com/download?id=1vJ2FEw7RNfRr0KBynHM36_nNAc4jTArl&export=download&authuser=0&confirm=t&uuid=11f697bc-1414-46a0-bc2d-b2c49686d85e&at=APZUnTXCtKDpqTAOstIioRrJzx67:1709303181059" }, { - "path": "/home/user/Desktop/teris/settings.py", + "path": "/home/user/Desktop/tetris/settings.py", "url": "https://drive.usercontent.google.com/download?id=1tYI8QZz-T-sNFRAMaYnzFv80upIveUT-&export=download&authuser=0&confirm=t&uuid=50c64eed-ab7e-4dbe-82ce-d01b25364556&at=APZUnTXUok8Cos6rUo-fnKLwnPmn:1709303184763" }, { - "path": "/home/user/Desktop/teris/teris.py", + "path": "/home/user/Desktop/tetris/tetris.py", "url": "https://drive.usercontent.google.com/download?id=1UOJp5Y6JLmaXmZXHc_GkM3nMfKbS6Dyu&export=download&authuser=0&confirm=t&uuid=3189e5d6-5984-45c7-9dd8-6f8ec3a0845e&at=APZUnTWYTtsu27Ds3aHrZpIAnNcA:1709303187967" } ] @@ -42,7 +42,7 @@ "parameters": { "command": [ "code", - "/home/user/Desktop/teris" + "/home/user/Desktop/tetris" ] } } @@ -53,16 +53,28 @@ "os" ], "evaluator": { - "postconfig": [], - "func": "", - "result": { - - }, - "expected": { - - }, - "options": { - - } - } + "postconfig": [], + "func": "check_python_file_by_test_suite", + "result": { + "type": "vm_file", + "path": [ + "/home/user/Desktop/tetris/block.py", + "/home/user/Desktop/tetris/main.py", + "/home/user/Desktop/tetris/settings.py", + "/home/user/Desktop/tetris/tetris.py" + ], + "dest": [ + "block.py", + "main.py", + "settings.py", + "tetris.py" + ], + "multi": true + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1eDEWLCswia1ihoh5kx5GGGDmFLt6GH_A&export=download&authuser=0&confirm=t&uuid=30330a2f-ae74-4299-bf22-0d8a6f814e1f&at=APZUnTUbxsypC6Blx3nJPFfCiaeV:1709449044563", + "dest": "test_suite.py" + } + } }