Files
sci-gui-agent-benchmark/mm_agents/coact/autogen/coding/markdown_code_extractor.py
2025-07-31 10:35:20 +08:00

46 lines
1.6 KiB
Python

# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors
#
# SPDX-License-Identifier: Apache-2.0
#
# Portions derived from https://github.com/microsoft/autogen are under the MIT License.
# SPDX-License-Identifier: MIT
import re
from typing import Union
from ..code_utils import CODE_BLOCK_PATTERN, UNKNOWN, content_str, infer_lang
from ..doc_utils import export_module
from ..types import UserMessageImageContentPart, UserMessageTextContentPart
from .base import CodeBlock, CodeExtractor
__all__ = ("MarkdownCodeExtractor",)
@export_module("autogen.coding")
class MarkdownCodeExtractor(CodeExtractor):
"""(Experimental) A class that extracts code blocks from a message using Markdown syntax."""
def extract_code_blocks(
self, message: Union[str, list[Union[UserMessageTextContentPart, UserMessageImageContentPart]], None]
) -> list[CodeBlock]:
"""(Experimental) Extract code blocks from a message. If no code blocks are found,
return an empty list.
Args:
message (str): The message to extract code blocks from.
Returns:
List[CodeBlock]: The extracted code blocks or an empty list.
"""
text = content_str(message)
match = re.findall(CODE_BLOCK_PATTERN, text, flags=re.DOTALL)
if not match:
return []
code_blocks = []
for lang, code in match:
if lang == "":
lang = infer_lang(code)
if lang == UNKNOWN:
lang = ""
code_blocks.append(CodeBlock(code=code, language=lang))
return code_blocks