From 29caebb765f9dcc0a2581a4949631b1e38fa168d Mon Sep 17 00:00:00 2001 From: Shenzhennan <108182492+5456es@users.noreply.github.com> Date: Thu, 10 Jul 2025 00:36:32 +0800 Subject: [PATCH] Impress check and fix (all font compare issue) (#247) * Enhance PPTX comparison logic in slides.py - Improved alignment comparison to treat None and LEFT as equivalent. - Added special handling for font bold and italic properties to consider None and False as equivalent. - Introduced a new bullet comparison function that allows for minor differences and tolerates formatting variations. - Updated JSON examples to support multiple file comparisons and results. * fix all fonts json file f23ac * fix clean the shape examination in unrelevatn part-top position check * Refactor JSON structure for PPTX comparison - Updated the instruction formatting for clarity. - Modified the comparison logic to support multiple expected and result files, enhancing flexibility in evaluations. - Changed the function key to an array to accommodate multiple comparison functions. - Introduced a conjunction key to specify logical relationships between comparisons. * fix impress-e4ef0baf by adding all fonts gold file * update impress bf4e9888 task ins * fix impress b8adbc24 font size * Enhance PPTX comparison functionality in slides.py - Introduced a debug logger for detailed output during PPTX comparisons. - Added a new function to recursively retrieve all text shapes, including those within groups. - Enabled debug logging to provide insights on slide and shape comparisons. - Updated JSON examples to support multiple expected and result files for enhanced evaluation flexibility. * Enable debug logging by default in PPTX comparison and enhance debug output for shape mismatches. Updated JSON examples to support multiple expected and result files for improved evaluation consistency. * fix impress all fons compare file * Refactor PPTX comparison logic and JSON examples for height modification tasks - Added critical notes in slides.py to clarify the execution order of shape examination and height modification checks. - Updated JSON examples to support multiple expected and result files, enhancing evaluation consistency. - Ensured that examine_shape must be set to False for examine_modify_height to function correctly, preventing premature termination of comparisons. * Enhance debug logging in PPTX comparison for detailed font attribute mismatches - Added debug logging for differences in font color, bold, italic, and underline attributes during table cell comparisons. - Improved clarity of debug output by including specific slide, shape, and cell indices for mismatches. - Ensured that existing comparison logic remains intact while enhancing debugging capabilities. * Enhance debug logging for font attribute mismatches in PPTX comparison - Added detailed debug logging for font name and size mismatches during PPTX comparisons, including specific slide, shape, and paragraph indices. - Updated JSON examples to support multiple expected and result files, improving evaluation consistency. - Maintained existing comparison logic while enhancing debugging capabilities. * fix impress 3161de json file --------- Co-authored-by: yuanmengqi --- desktop_env/evaluators/metrics/slides.py | 310 ++++++++++++++++-- .../04578141-1d42-4146-b9cf-6fab4ce5fd74.json | 43 ++- .../05dd4c1d-c489-4c85-8389-a7836c4f0567.json | 35 +- .../08aced46-45a2-48d7-993b-ed3fb5b32302.json | 41 ++- .../3161d64e-3120-47b4-aaad-6a764a92493b.json | 3 +- .../4ed5abd0-8b5d-47bd-839f-cacfa15ca37a.json | 43 ++- .../550ce7e7-747b-495f-b122-acdc4d0b8e54.json | 40 ++- .../57667013-ea97-417c-9dce-2713091e6e2a.json | 41 ++- .../5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1.json | 41 ++- .../5cfb9197-e72b-454b-900e-c06b0c802b40.json | 35 +- .../7ae48c60-f143-4119-b659-15b8f485eb9a.json | 59 +++- .../9ec204e4-f0a3-42f8-8458-b772a6797cab.json | 42 ++- .../a53f80cd-4a90-4490-8310-097b011433f6.json | 41 ++- .../b8adbc24-cef2-4b15-99d5-ecbe7ff445eb.json | 4 +- .../bf4e9888-f10f-47af-8dba-76413038b73c.json | 2 +- .../e4ef0baf-4b52-4590-a47e-d4d464cca2d7.json | 41 ++- .../f23acfd2-c485-4b7c-a1e7-d4303ddfe864.json | 57 +++- 17 files changed, 702 insertions(+), 176 deletions(-) diff --git a/desktop_env/evaluators/metrics/slides.py b/desktop_env/evaluators/metrics/slides.py index 905e19d..bee4f1a 100644 --- a/desktop_env/evaluators/metrics/slides.py +++ b/desktop_env/evaluators/metrics/slides.py @@ -9,6 +9,31 @@ from pptx.enum.shapes import MSO_SHAPE_TYPE logger = logging.getLogger("desktopenv.metric.slides") +# Add a new logger specifically for debugging PPTX comparisons +debug_logger = logging.getLogger("desktopenv.metric.slides.debug") + +def enable_debug_logging(): + """Enable debug logging for PPTX comparison""" + debug_logger.setLevel(logging.DEBUG) + if not debug_logger.handlers: + handler = logging.StreamHandler() + handler.setLevel(logging.DEBUG) + formatter = logging.Formatter('[PPTX_DEBUG] %(message)s') + handler.setFormatter(formatter) + debug_logger.addHandler(handler) + +# Add debug logger for detailed comparison output +debug_logger = logging.getLogger("desktopenv.metric.slides.debug") + +def enable_debug_logging(): + """Enable detailed debug logging for PPTX comparison""" + debug_logger.setLevel(logging.DEBUG) + if not debug_logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + debug_logger.addHandler(handler) + def check_presenter_console_disable(config_file_path): try: @@ -135,11 +160,45 @@ def check_slide_numbers_color(pptx_file_path): # return similarity_index +def get_all_text_shapes(slide): + """递归获取slide中所有包含文本的shapes,包括GROUP内部的""" + + def extract_text_shapes(shape): + results = [] + + # 检查当前shape是否有文本 + if hasattr(shape, "text") and hasattr(shape, "text_frame"): + results.append(shape) + + # 如果是GROUP,递归检查内部shapes + if hasattr(shape, 'shapes'): + for sub_shape in shape.shapes: + results.extend(extract_text_shapes(sub_shape)) + + return results + + all_text_shapes = [] + for shape in slide.shapes: + all_text_shapes.extend(extract_text_shapes(shape)) + + return all_text_shapes + + def compare_pptx_files(file1_path, file2_path, **options): # todo: not strictly match since not all information is compared because we cannot get the info through pptx prs1 = Presentation(file1_path) prs2 = Presentation(file2_path) + # Enable debug logging if requested + enable_debug = options.get("enable_debug", True) + if enable_debug: + enable_debug_logging() + debug_logger.debug(f"=== COMPARING PPTX FILES ===") + debug_logger.debug(f"File 1: {file1_path}") + debug_logger.debug(f"File 2: {file2_path}") + debug_logger.debug(f"File 1 slides: {len(prs1.slides)}") + debug_logger.debug(f"File 2 slides: {len(prs2.slides)}") + approximately_tolerance = options.get("approximately_tolerance", 0.005) def is_approximately_equal(val1, val2, tolerance=approximately_tolerance): """Compare two values with a tolerance of 0.1% (0.005)""" @@ -176,12 +235,17 @@ def compare_pptx_files(file1_path, file2_path, **options): # compare the number of slides if len(prs1.slides) != len(prs2.slides) and examine_number_of_slides: + if enable_debug: + debug_logger.debug(f"MISMATCH: Number of slides differ - File1: {len(prs1.slides)}, File2: {len(prs2.slides)}") return 0 slide_idx = 0 # compare the content of each slide for slide1, slide2 in zip(prs1.slides, prs2.slides): slide_idx += 1 + if enable_debug: + debug_logger.debug(f"--- Comparing Slide {slide_idx} ---") + debug_logger.debug(f"Slide {slide_idx} - Shapes count: File1={len(slide1.shapes)}, File2={len(slide2.shapes)}") def get_slide_background_color(slide): # background = slide.background @@ -212,14 +276,35 @@ def compare_pptx_files(file1_path, file2_path, **options): return None if get_slide_notes(slide1).strip() != get_slide_notes(slide2).strip() and examine_note: + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx} - Notes differ:") + debug_logger.debug(f" Notes1: '{get_slide_notes(slide1).strip()}'") + debug_logger.debug(f" Notes2: '{get_slide_notes(slide2).strip()}'") return 0 + # Get all text shapes including those inside GROUPs + text_shapes1 = get_all_text_shapes(slide1) + text_shapes2 = get_all_text_shapes(slide2) + + if enable_debug: + debug_logger.debug(f"Slide {slide_idx} - Text shapes found: File1={len(text_shapes1)}, File2={len(text_shapes2)}") + # check if the number of slides is the same if len(slide1.shapes) != len(slide2.shapes): + if enable_debug: + debug_logger.debug(f"MISMATCH: Slide {slide_idx} - Different number of shapes: File1={len(slide1.shapes)}, File2={len(slide2.shapes)}") return 0 # check if the shapes are the same + shape_idx = 0 for shape1, shape2 in zip(slide1.shapes, slide2.shapes): + shape_idx += 1 + if enable_debug: + debug_logger.debug(f" Shape {shape_idx} - Type: {shape1.shape_type} vs {shape2.shape_type}") + if hasattr(shape1, "text") and hasattr(shape2, "text"): + debug_logger.debug(f" Shape {shape_idx} - Text: '{shape1.text.strip()}' vs '{shape2.text.strip()}'") + debug_logger.debug(f" Shape {shape_idx} - Position: ({shape1.left}, {shape1.top}) vs ({shape2.left}, {shape2.top})") + debug_logger.debug(f" Shape {shape_idx} - Size: ({shape1.width}, {shape1.height}) vs ({shape2.width}, {shape2.height})") if examine_title_bottom_position: if hasattr(shape1, "text") and hasattr(shape2, "text") and shape1.text == shape2.text: if shape1.text == "Product Comparison" and (shape1.top <= shape2.top or shape1.top < 3600000): @@ -249,11 +334,7 @@ def compare_pptx_files(file1_path, file2_path, **options): if slide_idx == 2 and shape1.shape_type == 13 and shape2.shape_type == 13: if shape1.top >= shape2.top or shape1.top > 1980000: return 0 - elif (not is_approximately_equal(shape1.left, shape2.left) or - not is_approximately_equal(shape1.top, shape2.top) or - not is_approximately_equal(shape1.width, shape2.width) or - not is_approximately_equal(shape1.height, shape2.height)): - return 0 + if examine_shape_for_shift_size: if (not is_approximately_equal(shape1.left, shape2.left) or @@ -264,11 +345,23 @@ def compare_pptx_files(file1_path, file2_path, **options): "text") and shape1.text == shape2.text and shape1.text == "Elaborate on what you want to discuss."): return 0 + # CRITICAL: examine_shape check happens BEFORE examine_modify_height! + # If examine_shape=True (default), any shape dimension mismatch will cause immediate return 0, + # preventing examine_modify_height from ever being executed. + # For height modification tasks, you MUST set examine_shape=False to allow examine_modify_height to work. if ( not is_approximately_equal(shape1.left, shape2.left) or not is_approximately_equal(shape1.top, shape2.top) or not is_approximately_equal(shape1.width, shape2.width) or not is_approximately_equal(shape1.height, shape2.height)) and examine_shape: + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} - Shape dimensions differ:") + debug_logger.debug(f" Left: {shape1.left} vs {shape2.left} (equal: {is_approximately_equal(shape1.left, shape2.left)})") + debug_logger.debug(f" Top: {shape1.top} vs {shape2.top} (equal: {is_approximately_equal(shape1.top, shape2.top)})") + debug_logger.debug(f" Width: {shape1.width} vs {shape2.width} (equal: {is_approximately_equal(shape1.width, shape2.width)})") + debug_logger.debug(f" Height: {shape1.height} vs {shape2.height} (equal: {is_approximately_equal(shape1.height, shape2.height)})") + if hasattr(shape1, "text") and hasattr(shape2, "text"): + debug_logger.debug(f" Shape text: '{shape1.text.strip()}' vs '{shape2.text.strip()}'") return 0 if examine_image_size: @@ -281,6 +374,11 @@ def compare_pptx_files(file1_path, file2_path, **options): not is_approximately_equal(shape1.height, shape2.height)): return 0 + # examine_modify_height: Special logic for height modification tasks + # - For non-text shapes and FREEFORM shapes (type 5): Only check height differences + # - For other shapes: Check all dimensions (left, top, width, height) + # WARNING: This check only works if examine_shape=False, otherwise examine_shape will + # terminate the comparison before this code is reached! if examine_modify_height: if not hasattr(shape1, "text") and not hasattr(shape2, "text") or shape1.shape_type == 5 and shape2.shape_type == 5: @@ -295,24 +393,92 @@ def compare_pptx_files(file1_path, file2_path, **options): if shape1.shape_type == MSO_SHAPE_TYPE.TABLE: table1 = shape1.table table2 = shape2.table + if enable_debug: + debug_logger.debug(f" Shape {shape_idx} - Comparing TABLE with {len(table1.rows)} rows and {len(table1.columns)} columns") for row_idx in range(len(table1.rows)): for col_idx in range(len(table1.columns)): cell1 = table1.cell(row_idx, col_idx) cell2 = table2.cell(row_idx, col_idx) - for para1, para2 in zip(cell1.text_frame.paragraphs, cell2.text_frame.paragraphs): - for run1, run2 in zip(para1.runs, para2.runs): - if run1.font.color.rgb != run2.font.color.rgb: - return 0 + for para_idx, (para1, para2) in enumerate(zip(cell1.text_frame.paragraphs, cell2.text_frame.paragraphs)): + for run_idx, (run1, run2) in enumerate(zip(para1.runs, para2.runs)): + # Check font color + if hasattr(run1.font.color, "rgb") and hasattr(run2.font.color, "rgb"): + if run1.font.color.rgb != run2.font.color.rgb and examine_color_rgb: + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}], Para {para_idx}, Run {run_idx} - Font color differs:") + debug_logger.debug(f" Color1: {run1.font.color.rgb} vs Color2: {run2.font.color.rgb}") + debug_logger.debug(f" Cell text: '{cell1.text.strip()}' vs '{cell2.text.strip()}'") + debug_logger.debug(f" Run text: '{run1.text}' vs '{run2.text}'") + return 0 + + # Check font bold + if run1.font.bold != run2.font.bold: + if not ((run1.font.bold is None or run1.font.bold is False) and + (run2.font.bold is None or run2.font.bold is False)): + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}], Para {para_idx}, Run {run_idx} - Font bold differs:") + debug_logger.debug(f" Bold1: {run1.font.bold} vs Bold2: {run2.font.bold}") + debug_logger.debug(f" Run text: '{run1.text}' vs '{run2.text}'") + return 0 + + # Check font italic + if run1.font.italic != run2.font.italic: + if not ((run1.font.italic is None or run1.font.italic is False) and + (run2.font.italic is None or run2.font.italic is False)): + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}], Para {para_idx}, Run {run_idx} - Font italic differs:") + debug_logger.debug(f" Italic1: {run1.font.italic} vs Italic2: {run2.font.italic}") + debug_logger.debug(f" Run text: '{run1.text}' vs '{run2.text}'") + return 0 + + # Check font underline + if run1.font.underline != run2.font.underline: + if run1.font.underline is not None and run2.font.underline is not None: + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}], Para {para_idx}, Run {run_idx} - Font underline differs:") + debug_logger.debug(f" Underline1: {run1.font.underline} vs Underline2: {run2.font.underline}") + debug_logger.debug(f" Run text: '{run1.text}' vs '{run2.text}'") + return 0 + if (run1.font.underline is None and run2.font.underline is True) or (run1.font.underline is True and run2.font.underline is None): + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}], Para {para_idx}, Run {run_idx} - Font underline differs (None vs True):") + debug_logger.debug(f" Underline1: {run1.font.underline} vs Underline2: {run2.font.underline}") + debug_logger.debug(f" Run text: '{run1.text}' vs '{run2.text}'") + return 0 if hasattr(shape1, "text") and hasattr(shape2, "text"): if shape1.text.strip() != shape2.text.strip() and examine_text: return 0 # check if the paragraphs are the same + para_idx = 0 for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs): - if para1.alignment != para2.alignment and examine_alignment: - return 0 + para_idx += 1 + # Handle alignment comparison - treat None and LEFT (1) as equivalent + if examine_alignment: + from pptx.enum.text import PP_ALIGN + align1 = para1.alignment + align2 = para2.alignment + + if enable_debug: + align1_name = "None" if align1 is None else getattr(align1, 'name', str(align1)) + align2_name = "None" if align2 is None else getattr(align2, 'name', str(align2)) + debug_logger.debug(f" Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Alignment: '{align1_name}' vs '{align2_name}'") + debug_logger.debug(f" Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Text: '{para1.text}' vs '{para2.text}'") + + # Convert None to LEFT for comparison since None means default left alignment + if align1 is None: + align1 = PP_ALIGN.LEFT # LEFT alignment + if align2 is None: + align2 = PP_ALIGN.LEFT # LEFT alignment + + if align1 != align2: + if enable_debug: + align1_final = getattr(align1, 'name', str(align1)) + align2_final = getattr(align2, 'name', str(align2)) + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Alignment differs: '{align1_final}' vs '{align2_final}'") + return 0 # check if the runs are the same if para1.text != para2.text and examine_text: @@ -325,26 +491,59 @@ def compare_pptx_files(file1_path, file2_path, **options): # check if the font properties are the same if run1.font.name != run2.font.name and examine_font_name: + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Font name differs:") + debug_logger.debug(f" Name1: '{run1.font.name}' vs Name2: '{run2.font.name}'") + debug_logger.debug(f" Text: '{run1.text}' vs '{run2.text}'") return 0 if run1.font.size != run2.font.size and examine_font_size: + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Font size differs:") + debug_logger.debug(f" Size1: {run1.font.size} vs Size2: {run2.font.size}") + debug_logger.debug(f" Text: '{run1.text}' vs '{run2.text}'") return 0 if run1.font.bold != run2.font.bold and examine_font_bold: - return 0 + # Special handling for None vs False - both mean "not bold" + if not ((run1.font.bold is None or run1.font.bold is False) and + (run2.font.bold is None or run2.font.bold is False)): + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Font bold differs:") + debug_logger.debug(f" Bold1: {run1.font.bold} vs Bold2: {run2.font.bold}") + debug_logger.debug(f" Text: '{run1.text}' vs '{run2.text}'") + return 0 if run1.font.italic != run2.font.italic and examine_font_italic: - if run1.font.italic is not None and run2.font.italic is not None: + # Special handling for None vs False - both mean "not italic" + if not ((run1.font.italic is None or run1.font.italic is False) and + (run2.font.italic is None or run2.font.italic is False)): + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Font italic differs:") + debug_logger.debug(f" Italic1: {run1.font.italic} vs Italic2: {run2.font.italic}") + debug_logger.debug(f" Text: '{run1.text}' vs '{run2.text}'") return 0 if hasattr(run1.font.color, "rgb") and hasattr(run2.font.color, "rgb"): if run1.font.color.rgb != run2.font.color.rgb and examine_color_rgb: + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Font color differs:") + debug_logger.debug(f" Color1: {run1.font.color.rgb} vs Color2: {run2.font.color.rgb}") + debug_logger.debug(f" Text: '{run1.text}' vs '{run2.text}'") return 0 if run1.font.underline != run2.font.underline and examine_font_underline: if run1.font.underline is not None and run2.font.underline is not None: + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Font underline differs:") + debug_logger.debug(f" Underline1: {run1.font.underline} vs Underline2: {run2.font.underline}") + debug_logger.debug(f" Text: '{run1.text}' vs '{run2.text}'") return 0 if (run1.font.underline is None and run2.font.underline is True) or (run1.font.underline is True and run2.font.underline is None): + if enable_debug: + debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Font underline differs (None vs True):") + debug_logger.debug(f" Underline1: {run1.font.underline} vs Underline2: {run2.font.underline}") + debug_logger.debug(f" Text: '{run1.text}' vs '{run2.text}'") return 0 if run1.font._element.attrib.get('strike', 'noStrike') != run2.font._element.attrib.get( @@ -382,16 +581,89 @@ def compare_pptx_files(file1_path, file2_path, **options): return bullets - if examine_bullets: - bullets1 = _extract_bullets(run1.part.blob.decode('utf-8')) - bullets2 = _extract_bullets(run2.part.blob.decode('utf-8')) + def _compare_bullets_with_tolerance(bullets1, bullets2): + """Compare bullets with tolerance for minor differences""" + if len(bullets1) != len(bullets2): + return False - # Compare only non-empty bullets - if bullets1 != bullets2: - return 0 + for (lvl1, char1, text1, color1), (lvl2, char2, text2, color2) in zip(bullets1, bullets2): + # Compare text (most important) + if text1 != text2: + return False + + # Compare bullet character + if char1 != char2: + return False + + # Compare level with tolerance (None and '0' are equivalent) + normalized_lvl1 = '0' if lvl1 is None else lvl1 + normalized_lvl2 = '0' if lvl2 is None else lvl2 + if normalized_lvl1 != normalized_lvl2: + return False + + # Color comparison is more lenient - we don't fail on color differences + # since they might be due to theme or formatting differences + # if color1 != color2: + # return False + + return True + + if examine_bullets: + try: + bullets1 = _extract_bullets(run1.part.blob.decode('utf-8')) + bullets2 = _extract_bullets(run2.part.blob.decode('utf-8')) + + # Compare bullets with tolerance for minor differences + if not _compare_bullets_with_tolerance(bullets1, bullets2): + return 0 + except: + # If bullet extraction fails, skip bullet comparison + pass # fixme: Actually there are more properties to be compared, we can add them later via parsing the xml data + # Additional check: compare all text shapes including those in GROUPs + if examine_alignment and len(text_shapes1) == len(text_shapes2): + for idx, (tshape1, tshape2) in enumerate(zip(text_shapes1, text_shapes2)): + if enable_debug: + debug_logger.debug(f" Additional text shape check {idx+1}: '{tshape1.text.strip()[:30]}' vs '{tshape2.text.strip()[:30]}'") + + # Compare text content + if tshape1.text.strip() != tshape2.text.strip() and examine_text: + if enable_debug: + debug_logger.debug(f" MISMATCH: Text differs - '{tshape1.text.strip()}' vs '{tshape2.text.strip()}'") + return 0 + + # Compare alignment of each paragraph + for para_idx, (para1, para2) in enumerate(zip(tshape1.text_frame.paragraphs, tshape2.text_frame.paragraphs)): + from pptx.enum.text import PP_ALIGN + align1 = para1.alignment + align2 = para2.alignment + + if enable_debug: + align1_name = "None" if align1 is None else getattr(align1, 'name', str(align1)) + align2_name = "None" if align2 is None else getattr(align2, 'name', str(align2)) + debug_logger.debug(f" Para {para_idx+1}: Alignment '{align1_name}' vs '{align2_name}'") + + # Convert None to LEFT for comparison + if align1 is None: + align1 = PP_ALIGN.LEFT + if align2 is None: + align2 = PP_ALIGN.LEFT + + if align1 != align2: + if enable_debug: + align1_final = getattr(align1, 'name', str(align1)) + align2_final = getattr(align2, 'name', str(align2)) + debug_logger.debug(f" MISMATCH: Alignment differs - '{align1_final}' vs '{align2_final}'") + return 0 + elif len(text_shapes1) != len(text_shapes2): + if enable_debug: + debug_logger.debug(f"MISMATCH: Different number of text shapes - {len(text_shapes1)} vs {len(text_shapes2)}") + return 0 + + if enable_debug: + debug_logger.debug(f"=== COMPARISON SUCCESSFUL - Files match ===") return 1 diff --git a/evaluation_examples/examples/libreoffice_impress/04578141-1d42-4146-b9cf-6fab4ce5fd74.json b/evaluation_examples/examples/libreoffice_impress/04578141-1d42-4146-b9cf-6fab4ce5fd74.json index dd46301..22b5e1a 100644 --- a/evaluation_examples/examples/libreoffice_impress/04578141-1d42-4146-b9cf-6fab4ce5fd74.json +++ b/evaluation_examples/examples/libreoffice_impress/04578141-1d42-4146-b9cf-6fab4ce5fd74.json @@ -1,7 +1,7 @@ { "id": "04578141-1d42-4146-b9cf-6fab4ce5fd74", "snapshot": "libreoffice_impress", - "instruction":"Color the first three textboxes on slide 1 yellow, red, and green, respectively, in top-to-bottom order. Use exactly these colors—no variations (e.g., no dark red, light green, etc.).", + "instruction": "Color the first three textboxes on slide 1 yellow, red, and green, respectively, in top-to-bottom order. Use exactly these colors—no variations (e.g., no dark red, light green, etc.).", "source": "https://arxiv.org/pdf/2311.01767.pdf", "config": [ { @@ -64,18 +64,35 @@ } } ], - "func": "compare_pptx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/04578141-1d42-4146-b9cf-6fab4ce5fd74/45_2_Gold.pptx", - "dest": "45_2_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/45_2.pptx", - "dest": "45_2.pptx" - }, - "options": {} + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/04578141-1d42-4146-b9cf-6fab4ce5fd74/45_2_Gold.pptx", + "dest": "45_2_Gold.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/04578141-1d42-4146-b9cf-6fab4ce5fd74/45_2_Gold_all_fonts.pptx", + "dest": "45_2_Gold_All_fonts.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/45_2.pptx", + "dest": "45_2.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/45_2.pptx", + "dest": "45_2.pptx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/05dd4c1d-c489-4c85-8389-a7836c4f0567.json b/evaluation_examples/examples/libreoffice_impress/05dd4c1d-c489-4c85-8389-a7836c4f0567.json index dd6e15c..9a1b767 100644 --- a/evaluation_examples/examples/libreoffice_impress/05dd4c1d-c489-4c85-8389-a7836c4f0567.json +++ b/evaluation_examples/examples/libreoffice_impress/05dd4c1d-c489-4c85-8389-a7836c4f0567.json @@ -58,18 +58,35 @@ } } ], - "func": "compare_pptx_files", - "expected": { + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { "type": "cloud_file", "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/05dd4c1d-c489-4c85-8389-a7836c4f0567/38_1_Gold.pptx", "dest": "38_1_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/38_1.pptx", - "dest": "38_1.pptx" - }, - "options": {} + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/05dd4c1d-c489-4c85-8389-a7836c4f0567/38_1_Gold_all_fonts.pptx", + "dest": "38_1_Gold_all_fonts.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/38_1.pptx", + "dest": "38_1.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/38_1.pptx", + "dest": "38_1.pptx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/08aced46-45a2-48d7-993b-ed3fb5b32302.json b/evaluation_examples/examples/libreoffice_impress/08aced46-45a2-48d7-993b-ed3fb5b32302.json index 2ac67e8..b166591 100644 --- a/evaluation_examples/examples/libreoffice_impress/08aced46-45a2-48d7-993b-ed3fb5b32302.json +++ b/evaluation_examples/examples/libreoffice_impress/08aced46-45a2-48d7-993b-ed3fb5b32302.json @@ -58,18 +58,35 @@ } } ], - "func": "compare_pptx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/08aced46-45a2-48d7-993b-ed3fb5b32302/22_6_Gold.pptx", - "dest": "22_6_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/22_6.pptx", - "dest": "22_6.pptx" - }, - "options": {} + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/08aced46-45a2-48d7-993b-ed3fb5b32302/22_6_Gold.pptx", + "dest": "22_6_Gold.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/08aced46-45a2-48d7-993b-ed3fb5b32302/22_6_Gold2.pptx", + "dest": "22_6_Gold2.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/22_6.pptx", + "dest": "22_6.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/22_6.pptx", + "dest": "22_6.pptx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/3161d64e-3120-47b4-aaad-6a764a92493b.json b/evaluation_examples/examples/libreoffice_impress/3161d64e-3120-47b4-aaad-6a764a92493b.json index 9d2da5a..14d6462 100644 --- a/evaluation_examples/examples/libreoffice_impress/3161d64e-3120-47b4-aaad-6a764a92493b.json +++ b/evaluation_examples/examples/libreoffice_impress/3161d64e-3120-47b4-aaad-6a764a92493b.json @@ -70,8 +70,7 @@ "dest": "45_1.pptx" }, "options": { - "examine_shape": false, - "examine_shape_for_shift_size": true + "examine_shape": false } }, "proxy": false diff --git a/evaluation_examples/examples/libreoffice_impress/4ed5abd0-8b5d-47bd-839f-cacfa15ca37a.json b/evaluation_examples/examples/libreoffice_impress/4ed5abd0-8b5d-47bd-839f-cacfa15ca37a.json index 42b3faa..613ea67 100644 --- a/evaluation_examples/examples/libreoffice_impress/4ed5abd0-8b5d-47bd-839f-cacfa15ca37a.json +++ b/evaluation_examples/examples/libreoffice_impress/4ed5abd0-8b5d-47bd-839f-cacfa15ca37a.json @@ -58,20 +58,35 @@ } } ], - "func": "compare_pptx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/4ed5abd0-8b5d-47bd-839f-cacfa15ca37a/4_1_Gold.pptx", - "dest": "4_1_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/4_1.pptx", - "dest": "4_1.pptx" - }, - "options": { - "examine_shape": false - } + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/4ed5abd0-8b5d-47bd-839f-cacfa15ca37a/4_1_Gold.pptx", + "dest": "4_1_Gold.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/4ed5abd0-8b5d-47bd-839f-cacfa15ca37a/4_1_Gold_all_fonts.pptx", + "dest": "4_1_Gold_all_fonts.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/4_1.pptx", + "dest": "4_1.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/4_1.pptx", + "dest": "4_1.pptx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json b/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json index 2e9009a..9080319 100644 --- a/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json +++ b/evaluation_examples/examples/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54.json @@ -74,17 +74,35 @@ } } ], - "func": "compare_pptx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54/New_Club_Spring_2018_Training_with_strike.data", - "dest": "New_Club_Spring_2018_Training_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/New_Club_Spring_2018_Training.pptx", - "dest": "New_Club_Spring_2018_Training.pptx" - } + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54/New_Club_Spring_2018_Training_with_strike.data", + "dest": "New_Club_Spring_2018_Training_Gold.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/550ce7e7-747b-495f-b122-acdc4d0b8e54/New_Club_Spring_2018_Training_Gold_all_fonts.pptx", + "dest": "New_Club_Spring_2018_Training_Gold_all_fonts.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/New_Club_Spring_2018_Training.pptx", + "dest": "New_Club_Spring_2018_Training.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/New_Club_Spring_2018_Training.pptx", + "dest": "New_Club_Spring_2018_Training.pptx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/57667013-ea97-417c-9dce-2713091e6e2a.json b/evaluation_examples/examples/libreoffice_impress/57667013-ea97-417c-9dce-2713091e6e2a.json index aff925c..812bdda 100644 --- a/evaluation_examples/examples/libreoffice_impress/57667013-ea97-417c-9dce-2713091e6e2a.json +++ b/evaluation_examples/examples/libreoffice_impress/57667013-ea97-417c-9dce-2713091e6e2a.json @@ -58,18 +58,35 @@ } } ], - "func": "compare_pptx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/57667013-ea97-417c-9dce-2713091e6e2a/1_2_Gold.pptx", - "dest": "1_2_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/1_2.pptx", - "dest": "1_2.pptx" - }, - "options": {} + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/57667013-ea97-417c-9dce-2713091e6e2a/1_2_Gold.pptx", + "dest": "1_2_Gold.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/57667013-ea97-417c-9dce-2713091e6e2a/1_2_Gold_all_fonts.pptx", + "dest": "1_2_Gold_all_fonts.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/1_2.pptx", + "dest": "1_2.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/1_2.pptx", + "dest": "1_2.pptx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1.json b/evaluation_examples/examples/libreoffice_impress/5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1.json index 84f6a3a..a22e3ea 100644 --- a/evaluation_examples/examples/libreoffice_impress/5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1.json +++ b/evaluation_examples/examples/libreoffice_impress/5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1.json @@ -58,18 +58,35 @@ } } ], - "func": "compare_pptx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1/39_2_Gold.pptx", - "dest": "39_2_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/39_2.pptx", - "dest": "39_2.pptx" - }, - "options": {} + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1/39_2_Gold.pptx", + "dest": "39_2_Gold.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1/39_2_Gold_all_fonts.pptx", + "dest": "39_2_Gold_all_fonts.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/39_2.pptx", + "dest": "39_2.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/39_2.pptx", + "dest": "39_2.pptx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/5cfb9197-e72b-454b-900e-c06b0c802b40.json b/evaluation_examples/examples/libreoffice_impress/5cfb9197-e72b-454b-900e-c06b0c802b40.json index f6446a1..74be16d 100644 --- a/evaluation_examples/examples/libreoffice_impress/5cfb9197-e72b-454b-900e-c06b0c802b40.json +++ b/evaluation_examples/examples/libreoffice_impress/5cfb9197-e72b-454b-900e-c06b0c802b40.json @@ -58,18 +58,35 @@ } } ], - "func": "compare_pptx_files", - "expected": { + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { "type": "cloud_file", "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/5cfb9197-e72b-454b-900e-c06b0c802b40/33_1_Gold.pptx", "dest": "33_1_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/33_1.pptx", - "dest": "33_1.pptx" - }, - "options": {} + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/5cfb9197-e72b-454b-900e-c06b0c802b40/33_1_Gold_all_fonts.pptx", + "dest": "33_1_Gold_all_fonts.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/33_1.pptx", + "dest": "33_1.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/33_1.pptx", + "dest": "33_1.pptx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/7ae48c60-f143-4119-b659-15b8f485eb9a.json b/evaluation_examples/examples/libreoffice_impress/7ae48c60-f143-4119-b659-15b8f485eb9a.json index 6aa13a4..2a62d3d 100644 --- a/evaluation_examples/examples/libreoffice_impress/7ae48c60-f143-4119-b659-15b8f485eb9a.json +++ b/evaluation_examples/examples/libreoffice_impress/7ae48c60-f143-4119-b659-15b8f485eb9a.json @@ -1,7 +1,7 @@ { "id": "7ae48c60-f143-4119-b659-15b8f485eb9a", "snapshot": "libreoffice_impress", - "instruction": "Change picture's height to 20,30,25cm on slide 3,4,6 respectively.", + "instruction": "Change picture's height to 20, 30, 25cm on slide 3, 4, 6 respectively.", "hint": "The input unit is cm", "source": "https://arxiv.org/pdf/2311.01767.pdf", "config": [ @@ -28,6 +28,7 @@ "libreoffice_impress" ], "evaluator": { + "_config_note": "CRITICAL: examine_shape must be False for examine_modify_height to work. Due to execution order, examine_shape check happens before examine_modify_height, so any shape dimension mismatch will cause immediate failure before examine_modify_height logic is reached.", "postconfig": [ { "type": "activate_window", @@ -59,21 +60,47 @@ } } ], - "func": "compare_pptx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/7ae48c60-f143-4119-b659-15b8f485eb9a/30_1_Gold.pptx", - "dest": "30_1_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/30_1.pptx", - "dest": "30_1.pptx" - }, - "options": { - "examine_shape": false, - "examine_modify_height": true - } + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/7ae48c60-f143-4119-b659-15b8f485eb9a/30_1_Gold.pptx", + "dest": "30_1_Gold.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/7ae48c60-f143-4119-b659-15b8f485eb9a/30_1_Gold_all_fonts.pptx", + "dest": "30_1_Gold_all_fonts.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/30_1.pptx", + "dest": "30_1.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/30_1.pptx", + "dest": "30_1.pptx" + } + ], + "options": [ + { + "_note": "examine_shape=False is REQUIRED for examine_modify_height to work - see slides.py for execution order details", + "examine_modify_height": true, + "examine_shape": false + }, + { + "_note": "examine_shape=False is REQUIRED for examine_modify_height to work - see slides.py for execution order details", + "examine_modify_height": true, + "examine_shape": false + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json b/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json index 1f0f2fa..8823579 100644 --- a/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json +++ b/evaluation_examples/examples/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab.json @@ -1,7 +1,7 @@ { "id": "9ec204e4-f0a3-42f8-8458-b772a6797cab", "snapshot": "libreoffice_impress", - "instruction":"Please duplicate the last two slides and insert the copies in alternating order, so the sequence becomes: original slide A, original slide B, then duplicated slide A, duplicated slide B.", + "instruction": "Please duplicate the last two slides and insert the copies in alternating order, so the sequence becomes: original slide A, original slide B, then duplicated slide A, duplicated slide B.", "source": "https://www.tiktok.com/@lil.d1rt_/video/7247574148887629083", "config": [ { @@ -58,17 +58,35 @@ } } ], - "func": "compare_pptx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab/MLA_Workshop_061X_Works_Cited_Gold.pptx", - "dest": "MLA_Workshop_061X_Works_Cited_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/MLA_Workshop_061X_Works_Cited.pptx", - "dest": "MLA_Workshop_061X_Works_Cited.pptx" - } + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab/MLA_Workshop_061X_Works_Cited_Gold.pptx", + "dest": "MLA_Workshop_061X_Works_Cited_Gold.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/9ec204e4-f0a3-42f8-8458-b772a6797cab/MLA_Workshop_061X_Works_Cited_Gold_all_fonts.pptx", + "dest": "MLA_Workshop_061X_Works_Cited_Gold_all_fonts.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/MLA_Workshop_061X_Works_Cited.pptx", + "dest": "MLA_Workshop_061X_Works_Cited.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/MLA_Workshop_061X_Works_Cited.pptx", + "dest": "MLA_Workshop_061X_Works_Cited.pptx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/a53f80cd-4a90-4490-8310-097b011433f6.json b/evaluation_examples/examples/libreoffice_impress/a53f80cd-4a90-4490-8310-097b011433f6.json index 8845347..5ee3783 100644 --- a/evaluation_examples/examples/libreoffice_impress/a53f80cd-4a90-4490-8310-097b011433f6.json +++ b/evaluation_examples/examples/libreoffice_impress/a53f80cd-4a90-4490-8310-097b011433f6.json @@ -58,18 +58,35 @@ } } ], - "func": "compare_pptx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/a53f80cd-4a90-4490-8310-097b011433f6/21_0_Gold.pptx", - "dest": "21_0_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/21_0.pptx", - "dest": "21_0.pptx" - }, - "options": {} + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/a53f80cd-4a90-4490-8310-097b011433f6/21_0_Gold.pptx", + "dest": "21_0_Gold.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/a53f80cd-4a90-4490-8310-097b011433f6/21_0_Gold_all_fonts_3.pptx", + "dest": "21_0_Gold_all_fonts_3.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/21_0.pptx", + "dest": "21_0.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/21_0.pptx", + "dest": "21_0.pptx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/b8adbc24-cef2-4b15-99d5-ecbe7ff445eb.json b/evaluation_examples/examples/libreoffice_impress/b8adbc24-cef2-4b15-99d5-ecbe7ff445eb.json index 0350fec..5472554 100644 --- a/evaluation_examples/examples/libreoffice_impress/b8adbc24-cef2-4b15-99d5-ecbe7ff445eb.json +++ b/evaluation_examples/examples/libreoffice_impress/b8adbc24-cef2-4b15-99d5-ecbe7ff445eb.json @@ -72,7 +72,9 @@ "options": { "examine_alignment": false, "examine_font_name": false, - "examine_shape": false + "examine_shape": false, + "examine_font_size": false + } }, "proxy": false diff --git a/evaluation_examples/examples/libreoffice_impress/bf4e9888-f10f-47af-8dba-76413038b73c.json b/evaluation_examples/examples/libreoffice_impress/bf4e9888-f10f-47af-8dba-76413038b73c.json index e243bc0..63ac3d6 100644 --- a/evaluation_examples/examples/libreoffice_impress/bf4e9888-f10f-47af-8dba-76413038b73c.json +++ b/evaluation_examples/examples/libreoffice_impress/bf4e9888-f10f-47af-8dba-76413038b73c.json @@ -1,7 +1,7 @@ { "id": "bf4e9888-f10f-47af-8dba-76413038b73c", "snapshot": "libreoffice_impress", - "instruction": "I have a series of .png images named pic1.png to pic6.png saved on the Desktop. I want to create a new presentation with six blank slides, and insert one image per slide in order—pic1.png on the first slide, pic2.png on the second, and so on—following the numerical order. The goal is to prepare a presentation suitable for continuous playback in a kiosk or multimedia show. Could you help me with that?", + "instruction": "I have a series of .png images named pic1.png to pic6.png saved on the Desktop. I want to create a new presentation with six blank slides(which means no textbox or any other stuff on the page), and insert one image per slide in order—pic1.png on the first slide, pic2.png on the second, and so on—following the numerical order. The goal is to prepare a presentation suitable for continuous playback in a kiosk or multimedia show. Could you help me with that?", "source": "https://help.libreoffice.org/6.4/en-US/text/simpress/guide/photo_album.html?DbPAR=IMPRESS", "config": [ { diff --git a/evaluation_examples/examples/libreoffice_impress/e4ef0baf-4b52-4590-a47e-d4d464cca2d7.json b/evaluation_examples/examples/libreoffice_impress/e4ef0baf-4b52-4590-a47e-d4d464cca2d7.json index 2f4c6e9..7b3678c 100644 --- a/evaluation_examples/examples/libreoffice_impress/e4ef0baf-4b52-4590-a47e-d4d464cca2d7.json +++ b/evaluation_examples/examples/libreoffice_impress/e4ef0baf-4b52-4590-a47e-d4d464cca2d7.json @@ -58,18 +58,35 @@ } } ], - "func": "compare_pptx_files", - "expected": { - "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/e4ef0baf-4b52-4590-a47e-d4d464cca2d7/42_2_Gold.pptx", - "dest": "42_2_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/42_2.pptx", - "dest": "42_2.pptx" - }, - "options": {} + "func": [ + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/e4ef0baf-4b52-4590-a47e-d4d464cca2d7/42_2_Gold.pptx", + "dest": "42_2_Gold.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/e4ef0baf-4b52-4590-a47e-d4d464cca2d7/42_2_Gold_all_fonts.pptx", + "dest": "42_2_Gold_all_fonts.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/42_2.pptx", + "dest": "42_2.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/42_2.pptx", + "dest": "42_2.pptx" + } + ] }, "proxy": false } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_impress/f23acfd2-c485-4b7c-a1e7-d4303ddfe864.json b/evaluation_examples/examples/libreoffice_impress/f23acfd2-c485-4b7c-a1e7-d4303ddfe864.json index 526a75a..9d58fc2 100644 --- a/evaluation_examples/examples/libreoffice_impress/f23acfd2-c485-4b7c-a1e7-d4303ddfe864.json +++ b/evaluation_examples/examples/libreoffice_impress/f23acfd2-c485-4b7c-a1e7-d4303ddfe864.json @@ -58,18 +58,57 @@ } } ], - "func": "compare_pptx_files", - "expected": { + "func": [ + "compare_pptx_files", + "compare_pptx_files", + "compare_pptx_files", + "compare_pptx_files" + ], + "conj": "or", + "expected": [ + { "type": "cloud_file", "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/f23acfd2-c485-4b7c-a1e7-d4303ddfe864/69_4_Gold.pptx", "dest": "69_4_Gold.pptx" - }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/69_4.pptx", - "dest": "69_4.pptx" - }, - "options": {} + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/f23acfd2-c485-4b7c-a1e7-d4303ddfe864/69_4_Gold2.pptx", + "dest": "69_4_Gold2.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/f23acfd2-c485-4b7c-a1e7-d4303ddfe864/69_4_Gold_all_fonts_1.pptx", + "dest": "69_4_Gold_all_fonts_1.pptx" + }, + { + "type": "cloud_file", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_impress/f23acfd2-c485-4b7c-a1e7-d4303ddfe864/69_4_Gold_all_fonts_2.pptx", + "dest": "69_4_Gold_all_fonts_2.pptx" + } + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/69_4.pptx", + "dest": "69_4.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/69_4.pptx", + "dest": "69_4.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/69_4.pptx", + "dest": "69_4.pptx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/69_4.pptx", + "dest": "69_4.pptx" + } + ] }, "proxy": false } \ No newline at end of file