sci-gui-agent-benchmark/desktop_env/evaluators/metrics/slides.py

import logging
import xml.etree.ElementTree as ET
import zipfile
from math import sqrt

from pptx import Presentation
from pptx.util import Inches
from pptx.enum.shapes import MSO_SHAPE_TYPE

logger = logging.getLogger("desktopenv.metric.slides")


def check_presenter_console_disable(config_file_path):
    try:
        tree = ET.parse(config_file_path)
        root = tree.getroot()

        namespaces = {
            'oor': 'http://openoffice.org/2001/registry'
        }

        for item in root.findall(
                ".//item[@oor:path='/org.openoffice.Office.Impress/Misc/Start']/prop[@oor:name='EnablePresenterScreen']",
                namespaces):
            # Check if the value of the configuration item indicates that the presenter console has been disabled
            presenter_screen_enabled = item.find('value').text
            if presenter_screen_enabled.lower() == 'false':
                return 1.
            else:
                return 0.
        return 0.
    except Exception as e:
        logger.error(f"Error: {e}")
        return 0.


def check_image_stretch_and_center(modified_ppt, original_ppt):
    # fixme: this func is overfit to this example libreoffice_impress
    # Load the presentations
    original_pres = Presentation(original_ppt)
    modified_pres = Presentation(modified_ppt)

    # Get the first slide of each presentation
    original_slide = original_pres.slides[0]
    modified_slide = modified_pres.slides[0]

    # Get the image on the first slide of each presentation
    original_slide_images = [shape for shape in original_slide.shapes if shape.shape_type == 13]
    modified_slide_images = [shape for shape in modified_slide.shapes if shape.shape_type == 13]

    the_image = original_slide_images[0]

    the_modified_image = None

    # Get the images that modified in width and height
    for modified_image in modified_slide_images:
        if the_image.image.blob == modified_image.image.blob:
            the_modified_image = modified_image

    if the_modified_image is None:
        return 0.

    if (abs(the_modified_image.width - original_pres.slide_width) > Inches(0.5) or
            abs(the_modified_image.height - original_pres.slide_height) > Inches(0.5) or
            abs(the_modified_image.left - (original_pres.slide_width - the_modified_image.width) / 2) > Inches(0.5) or
            abs(the_modified_image.top - (original_pres.slide_height - the_modified_image.height) / 2) > Inches(0.5)):
        return 0.

    return 1.


def is_red_color(color):
    # judge if the color is red
    return color and color.rgb == (255, 0, 0)


def get_master_placeholder_color(prs):
    # get the color of the placeholder
    masters = prs.slide_masters
    for idx, master in enumerate(masters):
        for placeholder in master.placeholders:
            if placeholder.has_text_frame and placeholder.text == "<number>":
                text_frame = placeholder.text_frame

                if text_frame.paragraphs:
                    first_paragraph = text_frame.paragraphs[0]
                    return first_paragraph.font.color
    return None


def check_slide_numbers_color(pptx_file_path):
    presentation = Presentation(pptx_file_path)

    for i, slide in enumerate(presentation.slides):
        for shape in slide.shapes:
            # check if the shape is a text box
            if hasattr(shape, "text"):
                if shape.text.isdigit():
                    # "SlidePlaceholder" is the name of the placeholder in the master slide
                    page_number_text = shape.text
                    font_color = get_master_placeholder_color(presentation)
                    return 1 if font_color is not None and is_red_color(font_color) else 0


# import numpy as np
# from PIL import Image
# from skimage.metrics import structural_similarity as ssim

# def compare_images(image1_path, image2_path):
#     # You would call this function with the paths to the two images you want to compare:
#     # score = compare_images('path_to_image1', 'path_to_image2')
#     # print("Similarity score:", score)

#     if not image1_path or not image2_path:
#         return 0

#     # Open the images and convert to grayscale
#     image1 = Image.open(image1_path).convert('L')
#     image2 = Image.open(image2_path).convert('L')

#     # Resize images to the smaller one's size for comparison
#     image1_size = image1.size
#     image2_size = image2.size
#     new_size = min(image1_size, image2_size)

#     image1 = image1.resize(new_size, Image.Resampling.LANCZOS)
#     image2 = image2.resize(new_size, Image.Resampling.LANCZOS)

#     # Convert images to numpy arrays
#     image1_array = np.array(image1)
#     image2_array = np.array(image2)

#     # Calculate SSIM between two images
#     similarity_index = ssim(image1_array, image2_array)

#     return similarity_index

def compare_pptx_files(file1_path, file2_path, **options):
    # todo: not strictly match since not all information is compared because we cannot get the info through pptx
    prs1 = Presentation(file1_path)
    prs2 = Presentation(file2_path)

    approximately_tolerance = options.get("approximately_tolerance", 0.005)
    def is_approximately_equal(val1, val2, tolerance=approximately_tolerance):
        """Compare two values with a tolerance of 0.1% (0.005)"""
        if val1 == val2:
            return True
        if val1 == 0 and val2 == 0:
            return True
        if val1 == 0 or val2 == 0:
            return False
        return abs(val1 - val2) / max(abs(val1), abs(val2)) <= tolerance

    examine_number_of_slides = options.get("examine_number_of_slides", True)
    examine_shape = options.get("examine_shape", True)
    examine_text = options.get("examine_text", True)
    examine_indent = options.get("examine_indent", True)
    examine_font_name = options.get("examine_font_name", True)
    examine_font_size = options.get("examine_font_size", True)
    examine_font_bold = options.get("examine_font_bold", True)
    examine_font_italic = options.get("examine_font_italic", True)
    examine_color_rgb = options.get("examine_color_rgb", True)
    examine_font_underline = options.get("examine_font_underline", True)
    examine_strike_through = options.get("examine_strike_through", True)
    examine_alignment = options.get("examine_alignment", True)
    examine_title_bottom_position = options.get("examine_title_bottom_position", False)
    examine_table_bottom_position = options.get("examine_table_bottom_position", False)
    examine_right_position = options.get("examine_right_position", False)
    examine_top_position = options.get("examine_top_position", False)
    examine_shape_for_shift_size = options.get("examine_shape_for_shift_size", False)
    examine_image_size = options.get("examine_image_size", False)
    examine_modify_height = options.get("examine_modify_height", False)
    examine_bullets = options.get("examine_bullets", True)
    examine_background_color = options.get("examine_background_color", True)
    examine_note = options.get("examine_note", True)

    # compare the number of slides
    if len(prs1.slides) != len(prs2.slides) and examine_number_of_slides:
        return 0

    slide_idx = 0
    # compare the content of each slide
    for slide1, slide2 in zip(prs1.slides, prs2.slides):
        slide_idx += 1

        def get_slide_background_color(slide):
            # background = slide.background
            # if background.fill.background():
            #     return background.fill.fore_color.rgb
            # else:
            #     return None
            fill = slide.background.fill
            if fill.type == 1:
                return fill.fore_color.rgb
            elif fill.type == 5:
                master_fill = slide.slide_layout.slide_master.background.fill
                if master_fill.type == 1:
                    return master_fill.fore_color.rgb
                else:
                    return None
            else:
                return None

        if get_slide_background_color(slide1) != get_slide_background_color(slide2) and examine_background_color:
            return 0

        def get_slide_notes(slide):
            notes_slide = slide.notes_slide
            if notes_slide:
                return notes_slide.notes_text_frame.text
            else:
                return None

        if get_slide_notes(slide1).strip() != get_slide_notes(slide2).strip() and examine_note:
            return 0

        # check if the number of slides is the same
        if len(slide1.shapes) != len(slide2.shapes):
            return 0

        # check if the shapes are the same
        for shape1, shape2 in zip(slide1.shapes, slide2.shapes):
            if examine_title_bottom_position:
                if hasattr(shape1, "text") and hasattr(shape2, "text") and shape1.text == shape2.text:
                    if shape1.text == "Product Comparison" and (shape1.top <= shape2.top or shape1.top < 3600000):
                        return 0
                elif (not is_approximately_equal(shape1.left, shape2.left) or
                      not is_approximately_equal(shape1.top, shape2.top) or
                      not is_approximately_equal(shape1.width, shape2.width) or
                      not is_approximately_equal(shape1.height, shape2.height)):
                    return 0

            if examine_table_bottom_position:
                if slide_idx == 3 and shape1.shape_type == 19 and shape2.shape_type == 19:
                    if shape1.top <= shape2.top or shape1.top < 3600000:
                        return 0
                elif (not is_approximately_equal(shape1.left, shape2.left) or
                      not is_approximately_equal(shape1.top, shape2.top) or
                      not is_approximately_equal(shape1.width, shape2.width) or
                      not is_approximately_equal(shape1.height, shape2.height)):
                    return 0

            if examine_right_position:
                if slide_idx == 2 and not hasattr(shape1, "text") and not hasattr(shape2, "text"):
                    if shape1.left <= shape2.left or shape1.left < 4320000:
                        return 0

            if examine_top_position:
                if slide_idx == 2 and shape1.shape_type == 13 and shape2.shape_type == 13:
                    if shape1.top >= shape2.top or shape1.top > 1980000:
                        return 0
                elif (not is_approximately_equal(shape1.left, shape2.left) or
                      not is_approximately_equal(shape1.top, shape2.top) or
                      not is_approximately_equal(shape1.width, shape2.width) or
                      not is_approximately_equal(shape1.height, shape2.height)):
                    return 0

            if examine_shape_for_shift_size:
                if (not is_approximately_equal(shape1.left, shape2.left) or
                    not is_approximately_equal(shape1.top, shape2.top) or
                    not is_approximately_equal(shape1.width, shape2.width) or
                    not is_approximately_equal(shape1.height, shape2.height)):
                    if not (hasattr(shape1, "text") and hasattr(shape2,
                                                                "text") and shape1.text == shape2.text and shape1.text == "Elaborate on what you want to discuss."):
                        return 0

            if (
                    not is_approximately_equal(shape1.left, shape2.left) or
                    not is_approximately_equal(shape1.top, shape2.top) or
                    not is_approximately_equal(shape1.width, shape2.width) or
                    not is_approximately_equal(shape1.height, shape2.height)) and examine_shape:
                return 0

            if examine_image_size:
                if shape1.shape_type == 13 and shape2.shape_type == 13:
                    if not is_approximately_equal(shape1.width, shape2.width) or not is_approximately_equal(shape1.height, shape2.height):
                        return 0
                elif (not is_approximately_equal(shape1.left, shape2.left) or
                      not is_approximately_equal(shape1.top, shape2.top) or
                      not is_approximately_equal(shape1.width, shape2.width) or
                      not is_approximately_equal(shape1.height, shape2.height)):
                    return 0

            if examine_modify_height:
                if not hasattr(shape1, "text") and not hasattr(shape2,
                                                               "text") or shape1.shape_type == 5 and shape2.shape_type == 5:
                    if not is_approximately_equal(shape1.height, shape2.height):
                        return 0
                elif (not is_approximately_equal(shape1.left, shape2.left) or
                      not is_approximately_equal(shape1.top, shape2.top) or
                      not is_approximately_equal(shape1.width, shape2.width) or
                      not is_approximately_equal(shape1.height, shape2.height)):
                    return 0

            if shape1.shape_type == MSO_SHAPE_TYPE.TABLE:
                table1 = shape1.table
                table2 = shape2.table
                for row_idx in range(len(table1.rows)):
                    for col_idx in range(len(table1.columns)):
                        cell1 = table1.cell(row_idx, col_idx)
                        cell2 = table2.cell(row_idx, col_idx)

                        for para1, para2 in zip(cell1.text_frame.paragraphs, cell2.text_frame.paragraphs):
                            for run1, run2 in zip(para1.runs, para2.runs):
                                if run1.font.color.rgb != run2.font.color.rgb:
                                    return 0

            if hasattr(shape1, "text") and hasattr(shape2, "text"):
                if shape1.text.strip() != shape2.text.strip() and examine_text:
                    return 0

                    # check if the paragraphs are the same
                for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
                    if para1.alignment != para2.alignment and examine_alignment:
                        return 0

                    # check if the runs are the same
                    if para1.text != para2.text and examine_text:
                        return 0

                    if para1.level != para2.level and examine_indent:
                        return 0

                    for run1, run2 in zip(para1.runs, para2.runs):

                        # check if the font properties are the same
                        if run1.font.name != run2.font.name and examine_font_name:
                            return 0

                        if run1.font.size != run2.font.size and examine_font_size:
                            return 0

                        if run1.font.bold != run2.font.bold and examine_font_bold:
                            return 0

                        if run1.font.italic != run2.font.italic and examine_font_italic:
                            if run1.font.italic is not None and run2.font.italic is not None:
                                return 0

                        if hasattr(run1.font.color, "rgb") and hasattr(run2.font.color, "rgb"):
                            if run1.font.color.rgb != run2.font.color.rgb and examine_color_rgb:
                                return 0

                        if run1.font.underline != run2.font.underline and examine_font_underline:
                            if run1.font.underline is not None and run2.font.underline is not None:
                                return 0
                            if (run1.font.underline is None and run2.font.underline is True) or (run1.font.underline is True and run2.font.underline is None):
                                return 0

                        if run1.font._element.attrib.get('strike', 'noStrike') != run2.font._element.attrib.get(
                                'strike', 'noStrike') and examine_strike_through:
                            return 0

                        def _extract_bullets(xml_data):
                            root = ET.fromstring(xml_data)

                            namespaces = {
                                'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
                                'p': 'http://schemas.openxmlformats.org/presentationml/2006/main',
                            }

                            bullets = []

                            for paragraph in root.findall('.//a:p', namespaces):
                                pPr = paragraph.find('a:pPr', namespaces)
                                if pPr is not None:
                                    lvl = pPr.get('lvl')
                                    buChar = pPr.find('a:buChar', namespaces)
                                    char = buChar.get('char') if buChar is not None else "No Bullet"
                                    buClr = pPr.find('a:buClr/a:srgbClr', namespaces)
                                    color = buClr.get('val') if buClr is not None else "No Color"
                                else:
                                    lvl = "No Level"
                                    char = "No Bullet"
                                    color = "No Color"

                                text = "".join(t.text for t in paragraph.findall('.//a:t', namespaces))

                                # Only add non-empty paragraphs to bullets list
                                if text.strip():
                                    bullets.append((lvl, char, text, color))

                            return bullets

                        if examine_bullets:
                            bullets1 = _extract_bullets(run1.part.blob.decode('utf-8'))
                            bullets2 = _extract_bullets(run2.part.blob.decode('utf-8'))

                            # Compare only non-empty bullets
                            if bullets1 != bullets2:
                                return 0

                    # fixme: Actually there are more properties to be compared, we can add them later via parsing the xml data

    return 1


def check_strikethrough(pptx_path, rules):
    # Load the presentation
    presentation = Presentation(pptx_path)

    slide_index_s = rules["slide_index_s"]
    shape_index_s = rules["shape_index_s"]
    paragraph_index_s = rules["paragraph_index_s"]

    try:
        for slide_index in slide_index_s:
            # Get the slide
            slide = presentation.slides[slide_index]

            for shape_index in shape_index_s:
                # Get the text box
                paragraphs = slide.shapes[shape_index].text_frame.paragraphs

                for paragraph_index in paragraph_index_s:
                    paragraph = paragraphs[paragraph_index]
                    run = paragraph.runs[0]
                    if 'strike' not in run.font._element.attrib:
                        return 0


    except Exception as e:
        logger.error(f"Error: {e}")
        return 0

    return 1


def check_slide_orientation_Portrait(pptx_path):
    presentation = Presentation(pptx_path)

    slide_height = presentation.slide_height
    slide_width = presentation.slide_width

    if slide_width < slide_height:
        return 1
    return 0


def evaluate_presentation_fill_to_rgb_distance(pptx_file, rules):
    rgb = rules["rgb"]

    try:
        original_rgb = rules["original_rgb"]
    except:
        original_rgb = None

    def get_rgb_from_color(color):
        try:
            if hasattr(color, "rgb"):
                return color.rgb
            else:
                return None
        except:
            return None

    def slide_fill_distance_to_rgb(_slide, _rgb, _original_rgb):
        fill = _slide.background.fill
        if fill.type == 1:
            color_rgb = get_rgb_from_color(fill.fore_color)
            if color_rgb is None:
                return 1
            r1, g1, b1 = color_rgb
            r2, g2, b2 = _rgb

            if _original_rgb is not None:
                r3, g3, b3 = _original_rgb
                if r1 == r3 and g1 == g3 and b1 == b3:
                    return 1

            return sqrt((r1 - r2) ** 2 + (g1 - g2) ** 2 + (b1 - b2) ** 2) / sqrt(255 ** 2 + 255 ** 2 + 255 ** 2)
        elif fill.type == 5:
            master_fill = _slide.slide_layout.slide_master.background.fill
            if master_fill.type == 1:
                color_rgb = get_rgb_from_color(master_fill.fore_color)
                if color_rgb is None:
                    return 1
                r1, g1, b1 = color_rgb
            else:
                return 1
            r2, g2, b2 = _rgb

            if _original_rgb is not None:
                r3, g3, b3 = _original_rgb
                if r1 == r3 and g1 == g3 and b1 == b3:
                    return 1

            return sqrt((r1 - r2) ** 2 + (g1 - g2) ** 2 + (b1 - b2) ** 2) / sqrt(255 ** 2 + 255 ** 2 + 255 ** 2)

        return 1

    prs = Presentation(pptx_file)
    similarity = 1 - sum(slide_fill_distance_to_rgb(slide, rgb, original_rgb) for slide in prs.slides) / len(prs.slides)
    return similarity


def check_left_panel(accessibility_tree):
    namespaces = {
        'st': 'uri:deskat:state.at-spi.gnome.org',
        'cp': 'uri:deskat:component.at-spi.gnome.org'
    }

    root = ET.fromstring(accessibility_tree)

    # 遍历所有 document-frame 节点
    for doc_frame in root.iter('document-frame'):
        if doc_frame.attrib.get("name") == "Slides View":
            # 说明 Slides View 存在，即左侧面板已打开
            return 1.

    # 没找到 Slides View，认为左侧面板未打开
    return 0.


def check_transition(pptx_file, rules):
    slide_idx = rules['slide_idx']
    transition_type = rules['transition_type']

    # Use the zipfile module to open the .pptx file
    with zipfile.ZipFile(pptx_file, 'r') as zip_ref:
        # Get the slide XML file
        slide_name = 'ppt/slides/slide{}.xml'.format(slide_idx + 1)
        try:
            zip_ref.getinfo(slide_name)
        except KeyError:
            # Slide does not exist
            return 0.

        with zip_ref.open(slide_name) as slide_file:
            # 解析XML
            tree = ET.parse(slide_file)
            root = tree.getroot()

            # XML namespace
            namespaces = {
                'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
                'p': 'http://schemas.openxmlformats.org/presentationml/2006/main',
            }

            # Search for the transition element
            transition = root.find('.//p:transition', namespaces)
            if transition is not None:
                # Check if the transition is an expected transition
                dissolve = transition.find('.//p:{}'.format(transition_type), namespaces)
                if dissolve is not None:
                    return 1.
                else:
                    return 0.
            else:
                return 0.


def check_page_number_colors(pptx_file, rules):
    color = rules["color"]

    def is_red(rgb_str, threshold=50):
        r, g, b = int(rgb_str[1:3], 16), int(rgb_str[3:5], 16), int(rgb_str[5:7], 16)
        return r > g + threshold and r > b + threshold

    def is_blue(rgb_str, threshold=50):
        r, g, b = int(rgb_str[1:3], 16), int(rgb_str[3:5], 16), int(rgb_str[5:7], 16)
        return b > g + threshold and b > r + threshold

    def is_green(rgb_str, threshold=50):
        r, g, b = int(rgb_str[1:3], 16), int(rgb_str[3:5], 16), int(rgb_str[5:7], 16)
        return g > r + threshold and g > b + threshold

    def is_black(rgb_str, threshold=50):
        r, g, b = int(rgb_str[1:3], 16), int(rgb_str[3:5], 16), int(rgb_str[5:7], 16)
        return r < threshold and g < threshold and b < threshold

    with zipfile.ZipFile(pptx_file, 'r') as zip_ref:
        slide_master_name = 'ppt/slideMasters/slideMaster1.xml'
        with zip_ref.open(slide_master_name) as slide_master_file:
            tree = ET.parse(slide_master_file)
            root = tree.getroot()

            namespaces = {
                'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
                'p': 'http://schemas.openxmlformats.org/presentationml/2006/main',
            }

            color_elems = root.findall('.//a:solidFill//a:srgbClr', namespaces)
            slides_color_val = color_elems[-2].get('val')

    if slides_color_val is None:
        return 0
    elif color == "red" and not is_red(slides_color_val):
        return 0
    elif color == "blue" and not is_blue(slides_color_val):
        return 0
    elif color == "green" and not is_green(slides_color_val):
        return 0
    elif color == "black" and not is_black(slides_color_val):
        return 0

    return 1


def check_auto_saving_time(pptx_file, rules):
    minutes = rules["minutes"]

    # open and parse xml file
    try:
        tree = ET.parse(pptx_file)
        root = tree.getroot()

        # Traverse the XML tree to find the autosave time setting
        autosave_time = None
        for item in root.findall(".//item"):
            # Check the path attribute
            path = item.get('{http://openoffice.org/2001/registry}path')
            if path == "/org.openoffice.Office.Common/Save/Document":
                # Once the correct item is found, look for the prop element with the name "AutoSaveTimeIntervall"
                for prop in item.findall(".//prop"):
                    name = prop.get('{http://openoffice.org/2001/registry}name')
                    if name == "AutoSaveTimeIntervall":
                        # Extract the value of the autosave time interval
                        autosave_time = prop.find(".//value").text
                        break

        if autosave_time is None:
            return 0
        else:
            autosave_time = int(autosave_time)
            if autosave_time == minutes:
                return 1
            else:
                return 0

    except ET.ParseError as e:
        logger.error(f"Error parsing XML: {e}")
    except FileNotFoundError:
        logger.error(f"File not found: {pptx_file}")