CoACT initialize (#292)
This commit is contained in:
596
mm_agents/coact/autogen/code_utils.py
Normal file
596
mm_agents/coact/autogen/code_utils.py
Normal file
@@ -0,0 +1,596 @@
|
||||
# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Portions derived from https://github.com/microsoft/autogen are under the MIT License.
|
||||
# SPDX-License-Identifier: MIT
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import venv
|
||||
from concurrent.futures import ThreadPoolExecutor, TimeoutError
|
||||
from hashlib import md5
|
||||
from types import SimpleNamespace
|
||||
from typing import Callable, Optional, Union
|
||||
|
||||
import docker
|
||||
|
||||
from .types import UserMessageImageContentPart, UserMessageTextContentPart
|
||||
|
||||
SENTINEL = object()
|
||||
DEFAULT_MODEL = "gpt-4"
|
||||
FAST_MODEL = "gpt-3.5-turbo"
|
||||
# Regular expression for finding a code block
|
||||
# ```[ \t]*(\w+)?[ \t]*\r?\n(.*?)[ \t]*\r?\n``` Matches multi-line code blocks.
|
||||
# The [ \t]* matches the potential spaces before language name.
|
||||
# The (\w+)? matches the language, where the ? indicates it is optional.
|
||||
# The [ \t]* matches the potential spaces (not newlines) after language name.
|
||||
# The \r?\n makes sure there is a linebreak after ```.
|
||||
# The (.*?) matches the code itself (non-greedy).
|
||||
# The \r?\n makes sure there is a linebreak before ```.
|
||||
# The [ \t]* matches the potential spaces before closing ``` (the spec allows indentation).
|
||||
CODE_BLOCK_PATTERN = r"```[ \t]*(\w+)?[ \t]*\r?\n(.*?)\r?\n[ \t]*```"
|
||||
WORKING_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "extensions")
|
||||
UNKNOWN = "unknown"
|
||||
TIMEOUT_MSG = "Timeout"
|
||||
DEFAULT_TIMEOUT = 600
|
||||
WIN32 = sys.platform == "win32"
|
||||
PATH_SEPARATOR = (WIN32 and "\\") or "/"
|
||||
PYTHON_VARIANTS = ["python", "Python", "py"]
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def content_str(content: Union[str, list[Union[UserMessageTextContentPart, UserMessageImageContentPart]], None]) -> str:
|
||||
"""Converts the `content` field of an OpenAI message into a string format.
|
||||
|
||||
This function processes content that may be a string, a list of mixed text and image URLs, or None,
|
||||
and converts it into a string. Text is directly appended to the result string, while image URLs are
|
||||
represented by a placeholder image token. If the content is None, an empty string is returned.
|
||||
|
||||
Args:
|
||||
content: The content to be processed. Can be a string, a list of dictionaries representing text and image URLs, or None.
|
||||
|
||||
Returns:
|
||||
str: A string representation of the input content. Image URLs are replaced with an image token.
|
||||
|
||||
Note:
|
||||
- The function expects each dictionary in the list to have a "type" key that is either "text" or "image_url".
|
||||
For "text" type, the "text" key's value is appended to the result. For "image_url", an image token is appended.
|
||||
- This function is useful for handling content that may include both text and image references, especially
|
||||
in contexts where images need to be represented as placeholders.
|
||||
"""
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if not isinstance(content, list):
|
||||
raise TypeError(f"content must be None, str, or list, but got {type(content)}")
|
||||
|
||||
rst = ""
|
||||
for item in content:
|
||||
if not isinstance(item, dict):
|
||||
raise TypeError("Wrong content format: every element should be dict if the content is a list.")
|
||||
assert "type" in item, "Wrong content format. Missing 'type' key in content's dict."
|
||||
if item["type"] == "text":
|
||||
rst += item["text"]
|
||||
elif item["type"] in ["input_image", "image_url"]:
|
||||
rst += "<image>"
|
||||
else:
|
||||
raise ValueError(f"Wrong content format: unknown type {item['type']} within the content")
|
||||
return rst
|
||||
|
||||
|
||||
def infer_lang(code: str) -> str:
|
||||
"""Infer the language for the code.
|
||||
TODO: make it robust.
|
||||
"""
|
||||
if code.startswith("python ") or code.startswith("pip") or code.startswith("python3 "):
|
||||
return "sh"
|
||||
|
||||
# check if code is a valid python code
|
||||
try:
|
||||
compile(code, "test", "exec")
|
||||
return "python"
|
||||
except SyntaxError:
|
||||
# not a valid python code
|
||||
return UNKNOWN
|
||||
|
||||
|
||||
# TODO: In the future move, to better support https://spec.commonmark.org/0.30/#fenced-code-blocks
|
||||
# perhaps by using a full Markdown parser.
|
||||
def extract_code(
|
||||
text: Union[str, list], pattern: str = CODE_BLOCK_PATTERN, detect_single_line_code: bool = False
|
||||
) -> list[tuple[str, str]]:
|
||||
"""Extract code from a text.
|
||||
|
||||
Args:
|
||||
text (str or List): The content to extract code from. The content can be
|
||||
a string or a list, as returned by standard GPT or multimodal GPT.
|
||||
pattern (str, optional): The regular expression pattern for finding the
|
||||
code block. Defaults to CODE_BLOCK_PATTERN.
|
||||
detect_single_line_code (bool, optional): Enable the new feature for
|
||||
extracting single line code. Defaults to False.
|
||||
|
||||
Returns:
|
||||
list: A list of tuples, each containing the language and the code.
|
||||
If there is no code block in the input text, the language would be "unknown".
|
||||
If there is code block but the language is not specified, the language would be "".
|
||||
"""
|
||||
text = content_str(text)
|
||||
if not detect_single_line_code:
|
||||
match = re.findall(pattern, text, flags=re.DOTALL)
|
||||
return match if match else [(UNKNOWN, text)]
|
||||
|
||||
# Extract both multi-line and single-line code block, separated by the | operator
|
||||
# `([^`]+)`: Matches inline code.
|
||||
code_pattern = re.compile(CODE_BLOCK_PATTERN + r"|`([^`]+)`")
|
||||
code_blocks = code_pattern.findall(text)
|
||||
|
||||
# Extract the individual code blocks and languages from the matched groups
|
||||
extracted = []
|
||||
for lang, group1, group2 in code_blocks:
|
||||
if group1:
|
||||
extracted.append((lang.strip(), group1.strip()))
|
||||
elif group2:
|
||||
extracted.append(("", group2.strip()))
|
||||
|
||||
return extracted
|
||||
|
||||
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutError("Timed out!")
|
||||
|
||||
|
||||
def get_powershell_command():
|
||||
try:
|
||||
result = subprocess.run(["powershell", "$PSVersionTable.PSVersion.Major"], capture_output=True, text=True)
|
||||
if result.returncode == 0:
|
||||
return "powershell"
|
||||
except (FileNotFoundError, NotADirectoryError):
|
||||
# This means that 'powershell' command is not found so now we try looking for 'pwsh'
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["pwsh", "-Command", "$PSVersionTable.PSVersion.Major"], capture_output=True, text=True
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return "pwsh"
|
||||
except FileExistsError as e:
|
||||
raise FileNotFoundError(
|
||||
"Neither powershell.exe nor pwsh.exe is present in the system. "
|
||||
"Please install PowerShell and try again. "
|
||||
) from e
|
||||
except NotADirectoryError as e:
|
||||
raise NotADirectoryError(
|
||||
"PowerShell is either not installed or its path is not given "
|
||||
"properly in the environment variable PATH. Please check the "
|
||||
"path and try again. "
|
||||
) from e
|
||||
except PermissionError as e:
|
||||
raise PermissionError("No permission to run powershell.") from e
|
||||
|
||||
|
||||
def _cmd(lang: str) -> str:
|
||||
if lang in PYTHON_VARIANTS:
|
||||
return "python"
|
||||
if lang.startswith("python") or lang in ["bash", "sh"]:
|
||||
return lang
|
||||
if lang in ["shell"]:
|
||||
return "sh"
|
||||
if lang == "javascript":
|
||||
return "node"
|
||||
if lang in ["ps1", "pwsh", "powershell"]:
|
||||
powershell_command = get_powershell_command()
|
||||
return powershell_command
|
||||
|
||||
raise NotImplementedError(f"{lang} not recognized in code execution")
|
||||
|
||||
|
||||
def is_docker_running() -> bool:
|
||||
"""Check if docker is running.
|
||||
|
||||
Returns:
|
||||
bool: True if docker is running; False otherwise.
|
||||
"""
|
||||
try:
|
||||
client = docker.from_env()
|
||||
client.ping()
|
||||
return True
|
||||
except docker.errors.DockerException:
|
||||
return False
|
||||
|
||||
|
||||
def in_docker_container() -> bool:
|
||||
"""Check if the code is running in a docker container.
|
||||
|
||||
Returns:
|
||||
bool: True if the code is running in a docker container; False otherwise.
|
||||
"""
|
||||
return os.path.exists("/.dockerenv")
|
||||
|
||||
|
||||
def decide_use_docker(use_docker: Optional[bool]) -> Optional[bool]:
|
||||
if use_docker is None:
|
||||
env_var_use_docker = os.environ.get("AUTOGEN_USE_DOCKER", "True")
|
||||
|
||||
truthy_values = {"1", "true", "yes", "t"}
|
||||
falsy_values = {"0", "false", "no", "f"}
|
||||
|
||||
# Convert the value to lowercase for case-insensitive comparison
|
||||
env_var_use_docker_lower = env_var_use_docker.lower()
|
||||
|
||||
# Determine the boolean value based on the environment variable
|
||||
if env_var_use_docker_lower in truthy_values:
|
||||
use_docker = True
|
||||
elif env_var_use_docker_lower in falsy_values:
|
||||
use_docker = False
|
||||
elif env_var_use_docker_lower == "none": # Special case for 'None' as a string
|
||||
use_docker = None
|
||||
else:
|
||||
# Raise an error for any unrecognized value
|
||||
raise ValueError(
|
||||
f'Invalid value for AUTOGEN_USE_DOCKER: {env_var_use_docker}. Please set AUTOGEN_USE_DOCKER to "1/True/yes", "0/False/no", or "None".'
|
||||
)
|
||||
return use_docker
|
||||
|
||||
|
||||
def check_can_use_docker_or_throw(use_docker) -> None:
|
||||
if use_docker is not None:
|
||||
inside_docker = in_docker_container()
|
||||
docker_installed_and_running = is_docker_running()
|
||||
if use_docker and not inside_docker and not docker_installed_and_running:
|
||||
raise RuntimeError(
|
||||
"Code execution is set to be run in docker (default behaviour) but docker is not running.\n"
|
||||
"The options available are:\n"
|
||||
"- Make sure docker is running (advised approach for code execution)\n"
|
||||
'- Set "use_docker": False in code_execution_config\n'
|
||||
'- Set AUTOGEN_USE_DOCKER to "0/False/no" in your environment variables'
|
||||
)
|
||||
|
||||
|
||||
def _sanitize_filename_for_docker_tag(filename: str) -> str:
|
||||
"""Convert a filename to a valid docker tag.
|
||||
See https://docs.docker.com/engine/reference/commandline/tag/ for valid tag
|
||||
format.
|
||||
|
||||
Args:
|
||||
filename (str): The filename to be converted.
|
||||
|
||||
Returns:
|
||||
str: The sanitized Docker tag.
|
||||
"""
|
||||
# Replace any character not allowed with an underscore
|
||||
allowed_chars = set(string.ascii_letters + string.digits + "_.-")
|
||||
sanitized = "".join(char if char in allowed_chars else "_" for char in filename)
|
||||
|
||||
# Ensure it does not start with a period or a dash
|
||||
if sanitized.startswith(".") or sanitized.startswith("-"):
|
||||
sanitized = "_" + sanitized[1:]
|
||||
|
||||
# Truncate if longer than 128 characters
|
||||
return sanitized[:128]
|
||||
|
||||
|
||||
def execute_code(
|
||||
code: Optional[str] = None,
|
||||
timeout: Optional[int] = None,
|
||||
filename: Optional[str] = None,
|
||||
work_dir: Optional[str] = None,
|
||||
use_docker: Union[list[str], str, bool] = SENTINEL,
|
||||
lang: Optional[str] = "python",
|
||||
) -> tuple[int, str, Optional[str]]:
|
||||
"""Execute code in a docker container.
|
||||
This function is not tested on MacOS.
|
||||
|
||||
Args:
|
||||
code (Optional, str): The code to execute.
|
||||
If None, the code from the file specified by filename will be executed.
|
||||
Either code or filename must be provided.
|
||||
timeout (Optional, int): The maximum execution time in seconds.
|
||||
If None, a default timeout will be used. The default timeout is 600 seconds. On Windows, the timeout is not enforced when use_docker=False.
|
||||
filename (Optional, str): The file name to save the code or where the code is stored when `code` is None.
|
||||
If None, a file with a randomly generated name will be created.
|
||||
The randomly generated file will be deleted after execution.
|
||||
The file name must be a relative path. Relative paths are relative to the working directory.
|
||||
work_dir (Optional, str): The working directory for the code execution.
|
||||
If None, a default working directory will be used.
|
||||
The default working directory is the "extensions" directory under
|
||||
"path_to_autogen".
|
||||
use_docker (list, str or bool): The docker image to use for code execution.
|
||||
Default is True, which means the code will be executed in a docker container. A default list of images will be used.
|
||||
If a list or a str of image name(s) is provided, the code will be executed in a docker container
|
||||
with the first image successfully pulled.
|
||||
If False, the code will be executed in the current environment.
|
||||
Expected behaviour:
|
||||
- If `use_docker` is not set (i.e. left default to True) or is explicitly set to True and the docker package is available, the code will run in a Docker container.
|
||||
- If `use_docker` is not set (i.e. left default to True) or is explicitly set to True but the Docker package is missing or docker isn't running, an error will be raised.
|
||||
- If `use_docker` is explicitly set to False, the code will run natively.
|
||||
If the code is executed in the current environment,
|
||||
the code must be trusted.
|
||||
lang (Optional, str): The language of the code. Default is "python".
|
||||
|
||||
Returns:
|
||||
int: 0 if the code executes successfully.
|
||||
str: The error message if the code fails to execute; the stdout otherwise.
|
||||
image: The docker image name after container run when docker is used.
|
||||
"""
|
||||
if all((code is None, filename is None)):
|
||||
error_msg = f"Either {code=} or {filename=} must be provided."
|
||||
logger.error(error_msg)
|
||||
raise AssertionError(error_msg)
|
||||
|
||||
running_inside_docker = in_docker_container()
|
||||
docker_running = is_docker_running()
|
||||
|
||||
# SENTINEL is used to indicate that the user did not explicitly set the argument
|
||||
if use_docker is SENTINEL:
|
||||
use_docker = decide_use_docker(use_docker=None)
|
||||
check_can_use_docker_or_throw(use_docker)
|
||||
|
||||
timeout = timeout or DEFAULT_TIMEOUT
|
||||
original_filename = filename
|
||||
if WIN32 and lang in ["sh", "shell"] and (not use_docker):
|
||||
lang = "ps1"
|
||||
if filename is None:
|
||||
code_hash = md5(code.encode()).hexdigest()
|
||||
# create a file with a automatically generated name
|
||||
filename = f"tmp_code_{code_hash}.{'py' if lang.startswith('python') else lang}"
|
||||
if work_dir is None:
|
||||
work_dir = WORKING_DIR
|
||||
|
||||
filepath = os.path.join(work_dir, filename)
|
||||
file_dir = os.path.dirname(filepath)
|
||||
os.makedirs(file_dir, exist_ok=True)
|
||||
|
||||
if code is not None:
|
||||
with open(filepath, "w", encoding="utf-8") as fout:
|
||||
fout.write(code)
|
||||
|
||||
if not use_docker or running_inside_docker:
|
||||
# already running in a docker container
|
||||
cmd = [
|
||||
sys.executable if lang.startswith("python") else _cmd(lang),
|
||||
f".\\{filename}" if WIN32 else filename,
|
||||
]
|
||||
with ThreadPoolExecutor(max_workers=1) as executor:
|
||||
future = executor.submit(
|
||||
subprocess.run,
|
||||
cmd,
|
||||
cwd=work_dir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
try:
|
||||
result = future.result(timeout=timeout)
|
||||
except TimeoutError:
|
||||
if original_filename is None:
|
||||
os.remove(filepath)
|
||||
return 1, TIMEOUT_MSG, None
|
||||
if original_filename is None:
|
||||
os.remove(filepath)
|
||||
if result.returncode:
|
||||
logs = result.stderr
|
||||
if original_filename is None:
|
||||
abs_path = str(pathlib.Path(filepath).absolute())
|
||||
logs = logs.replace(str(abs_path), "").replace(filename, "")
|
||||
else:
|
||||
abs_path = str(pathlib.Path(work_dir).absolute()) + PATH_SEPARATOR
|
||||
logs = logs.replace(str(abs_path), "")
|
||||
else:
|
||||
logs = result.stdout
|
||||
return result.returncode, logs, None
|
||||
|
||||
# create a docker client
|
||||
if use_docker and not docker_running:
|
||||
raise RuntimeError(
|
||||
"Docker package is missing or docker is not running. Please make sure docker is running or set use_docker=False."
|
||||
)
|
||||
|
||||
client = docker.from_env()
|
||||
|
||||
image_list = (
|
||||
["python:3-slim", "python:3", "python:3-windowsservercore"]
|
||||
if use_docker is True
|
||||
else [use_docker]
|
||||
if isinstance(use_docker, str)
|
||||
else use_docker
|
||||
)
|
||||
for image in image_list:
|
||||
# check if the image exists
|
||||
try:
|
||||
client.images.get(image)
|
||||
break
|
||||
except docker.errors.ImageNotFound:
|
||||
# pull the image
|
||||
print("Pulling image", image)
|
||||
try:
|
||||
client.images.pull(image)
|
||||
break
|
||||
except docker.errors.DockerException:
|
||||
print("Failed to pull image", image)
|
||||
# get a randomized str based on current time to wrap the exit code
|
||||
exit_code_str = f"exitcode{time.time()}"
|
||||
abs_path = pathlib.Path(work_dir).absolute()
|
||||
cmd = [
|
||||
"sh",
|
||||
"-c",
|
||||
f'{_cmd(lang)} "{filename}"; exit_code=$?; echo -n {exit_code_str}; echo -n $exit_code; echo {exit_code_str}',
|
||||
]
|
||||
# create a docker container
|
||||
container = client.containers.run(
|
||||
image,
|
||||
command=cmd,
|
||||
working_dir="/workspace",
|
||||
detach=True,
|
||||
# get absolute path to the working directory
|
||||
volumes={abs_path: {"bind": "/workspace", "mode": "rw"}},
|
||||
)
|
||||
start_time = time.time()
|
||||
while container.status != "exited" and time.time() - start_time < timeout:
|
||||
# Reload the container object
|
||||
container.reload()
|
||||
if container.status != "exited":
|
||||
container.stop()
|
||||
container.remove()
|
||||
if original_filename is None:
|
||||
os.remove(filepath)
|
||||
return 1, TIMEOUT_MSG, image
|
||||
# get the container logs
|
||||
logs = container.logs().decode("utf-8").rstrip()
|
||||
# commit the image
|
||||
tag = _sanitize_filename_for_docker_tag(filename)
|
||||
container.commit(repository="python", tag=tag)
|
||||
# remove the container
|
||||
container.remove()
|
||||
# check if the code executed successfully
|
||||
exit_code = container.attrs["State"]["ExitCode"]
|
||||
if exit_code == 0:
|
||||
# extract the exit code from the logs
|
||||
pattern = re.compile(f"{exit_code_str}(\\d+){exit_code_str}")
|
||||
match = pattern.search(logs)
|
||||
exit_code = 1 if match is None else int(match.group(1))
|
||||
# remove the exit code from the logs
|
||||
logs = logs if match is None else pattern.sub("", logs)
|
||||
|
||||
if original_filename is None:
|
||||
os.remove(filepath)
|
||||
if exit_code:
|
||||
logs = logs.replace(f"/workspace/{filename if original_filename is None else ''}", "")
|
||||
# return the exit code, logs and image
|
||||
return exit_code, logs, f"python:{tag}"
|
||||
|
||||
|
||||
_GENERATE_ASSERTIONS_CONFIG = {
|
||||
"prompt": """Given the signature and docstring, write the exactly same number of assertion(s) for the provided example(s) in the docstring, without assertion messages.
|
||||
|
||||
func signature:
|
||||
{definition}
|
||||
assertions:""",
|
||||
"model": FAST_MODEL,
|
||||
"max_tokens": 256,
|
||||
"stop": "\n\n",
|
||||
}
|
||||
|
||||
|
||||
def _remove_check(response):
|
||||
"""Remove the check function from the response."""
|
||||
# find the position of the check function
|
||||
pos = response.find("def check(")
|
||||
if pos == -1:
|
||||
return response
|
||||
return response[:pos]
|
||||
|
||||
|
||||
def eval_function_completions(
|
||||
responses: list[str],
|
||||
definition: str,
|
||||
test: Optional[str] = None,
|
||||
entry_point: Optional[str] = None,
|
||||
assertions: Optional[Union[str, Callable[[str], tuple[str, float]]]] = None,
|
||||
timeout: Optional[float] = 3,
|
||||
use_docker: Optional[bool] = True,
|
||||
) -> dict:
|
||||
"""`(openai<1)` Select a response from a list of responses for the function completion task (using generated assertions), and/or evaluate if the task is successful using a gold test.
|
||||
|
||||
Args:
|
||||
responses: The list of responses.
|
||||
definition: The input definition.
|
||||
test: The test code.
|
||||
entry_point: The name of the function.
|
||||
assertions: The assertion code which serves as a filter of the responses, or an assertion generator.
|
||||
When provided, only the responses that pass the assertions will be considered for the actual test (if provided).
|
||||
timeout: The timeout for executing the code.
|
||||
use_docker: Whether to use docker for code execution.
|
||||
|
||||
Returns:
|
||||
dict: The success metrics.
|
||||
"""
|
||||
n = len(responses)
|
||||
if assertions is None:
|
||||
# no assertion filter
|
||||
success_list = []
|
||||
for i in range(n):
|
||||
response = _remove_check(responses[i])
|
||||
code = (
|
||||
f"{response}\n{test}\ncheck({entry_point})"
|
||||
if response.startswith("def")
|
||||
else f"{definition}{response}\n{test}\ncheck({entry_point})"
|
||||
)
|
||||
success = execute_code(code, timeout=timeout, use_docker=use_docker)[0] == 0
|
||||
success_list.append(success)
|
||||
return {
|
||||
"expected_success": 1 - pow(1 - sum(success_list) / n, n),
|
||||
"success": any(s for s in success_list),
|
||||
}
|
||||
if callable(assertions) and n > 1:
|
||||
# assertion generator
|
||||
assertions, gen_cost = assertions(definition)
|
||||
else:
|
||||
assertions, gen_cost = None, 0
|
||||
if n > 1 or test is None:
|
||||
for i in range(n):
|
||||
response = responses[i] = _remove_check(responses[i])
|
||||
code = (
|
||||
f"{response}\n{assertions}" if response.startswith("def") else f"{definition}{response}\n{assertions}"
|
||||
)
|
||||
succeed_assertions = execute_code(code, timeout=timeout, use_docker=use_docker)[0] == 0
|
||||
if succeed_assertions:
|
||||
break
|
||||
else:
|
||||
# just test, no need to check assertions
|
||||
succeed_assertions = False
|
||||
i, response = 0, responses[0]
|
||||
if test is None:
|
||||
# no test code
|
||||
return {
|
||||
"index_selected": i,
|
||||
"succeed_assertions": succeed_assertions,
|
||||
"gen_cost": gen_cost,
|
||||
"assertions": assertions,
|
||||
}
|
||||
code_test = (
|
||||
f"{response}\n{test}\ncheck({entry_point})"
|
||||
if response.startswith("def")
|
||||
else f"{definition}{response}\n{test}\ncheck({entry_point})"
|
||||
)
|
||||
success = execute_code(code_test, timeout=timeout, use_docker=use_docker)[0] == 0
|
||||
return {
|
||||
"index_selected": i,
|
||||
"succeed_assertions": succeed_assertions,
|
||||
"success": success,
|
||||
"gen_cost": gen_cost,
|
||||
"assertions": assertions,
|
||||
}
|
||||
|
||||
|
||||
_FUNC_COMPLETION_PROMPT = "# Python 3{definition}"
|
||||
_FUNC_COMPLETION_STOP = ["\nclass", "\ndef", "\nif", "\nprint"]
|
||||
_IMPLEMENT_CONFIGS = [
|
||||
{"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "cache_seed": 0},
|
||||
{"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 7, "cache_seed": 0},
|
||||
{"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "cache_seed": 1},
|
||||
{"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 2, "cache_seed": 2},
|
||||
{"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 1, "cache_seed": 2},
|
||||
]
|
||||
|
||||
|
||||
def create_virtual_env(dir_path: str, **env_args) -> SimpleNamespace:
|
||||
"""Creates a python virtual environment and returns the context.
|
||||
|
||||
Args:
|
||||
dir_path (str): Directory path where the env will be created.
|
||||
**env_args: Any extra args to pass to the `EnvBuilder`
|
||||
|
||||
Returns:
|
||||
SimpleNamespace: the virtual env context object.
|
||||
"""
|
||||
if not env_args:
|
||||
env_args = {"with_pip": True}
|
||||
env_builder = venv.EnvBuilder(**env_args)
|
||||
env_builder.create(dir_path)
|
||||
return env_builder.ensure_directories(dir_path)
|
||||
Reference in New Issue
Block a user