from pptx import Presentation from pptx.util import Inches def check_image_stretch_and_center(modified_ppt, original_ppt): # fixme: this func is overfit to this example libreoffice_impress # Load the presentations original_pres = Presentation(original_ppt) modified_pres = Presentation(modified_ppt) # Get the first slide of each presentation original_slide = original_pres.slides[0] modified_slide = modified_pres.slides[0] # Get the image on the first slide of each presentation original_slide_images = [shape for shape in original_slide.shapes if shape.shape_type == 13] modified_slide_images = [shape for shape in modified_slide.shapes if shape.shape_type == 13] the_image = original_slide_images[0] # Get the images that modified in width and height for modified_image in modified_slide_images: if the_image.image.blob == modified_image.image.blob: the_modified_image = modified_image if (abs(the_modified_image.width - original_pres.slide_width) > Inches(0.1) or abs(the_modified_image.height - original_pres.slide_height) > Inches(0.1) or abs(the_modified_image.left - (original_pres.slide_width - the_modified_image.width) / 2) > Inches(0.1) or abs(the_modified_image.top - (original_pres.slide_height - the_modified_image.height) / 2) > Inches(0.1)): return False return True def is_red_color(color): # judge if the color is red print(color.rgb) return color and color.rgb == (255, 0, 0) def get_master_placeholder_color(prs): # get the color of the placeholder masters = prs.slide_masters for idx, master in enumerate(masters): for placeholder in master.placeholders: if placeholder.has_text_frame and placeholder.text == "": text_frame = placeholder.text_frame if text_frame.paragraphs: first_paragraph = text_frame.paragraphs[0] return first_paragraph.font.color return None def check_slide_numbers_color(pptx_file_path): presentation = Presentation(pptx_file_path) for i, slide in enumerate(presentation.slides): for shape in slide.shapes: # check if the shape is a text box if hasattr(shape, "text"): if shape.text.isdigit(): # "SlidePlaceholder" is the name of the placeholder in the master slide page_number_text = shape.text font_color = get_master_placeholder_color(presentation) print(font_color) return 1 if font_color is not None and is_red_color(font_color) else 0 def compare_pptx_files(file1_path, file2_path, **options): # todo: not strictly match since not all information is compared because we cannot get the info through pptx prs1 = Presentation(file1_path) prs2 = Presentation(file2_path) # compare the number of slides if len(prs1.slides) != len(prs2.slides): return False # compare the content of each slide for slide1, slide2 in zip(prs1.slides, prs2.slides): # check if the shapes are the same for shape1, shape2 in zip(slide1.shapes, slide2.shapes): if shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height: return False if hasattr(shape1, "text") and hasattr(shape2, "text"): if shape1.text != shape2.text: return False # check if the paragraphs are the same for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs): # check if the runs are the same for run1, run2 in zip(para1.runs, para2.runs): if run1.text != run2.text: return False # check if the font properties are the same if run1.font.name != run2.font.name or run1.font.size != run2.font.size or run1.font.bold != run2.font.bold or run1.font.italic != run2.font.italic or run1.font.color.rgb != run2.font.color.rgb: return False return True def check_strikethrough(pptx_path, rules): # Load the presentation presentation = Presentation(pptx_path) slide_index_s = rules["slide_index_s"] shape_index_s = rules["shape_index_s"] paragraph_index_s = rules["paragraph_index_s"] for slide_index in slide_index_s: # Get the slide slide = presentation.slides[slide_index] for shape_index in shape_index_s: # Get the text box paragraphs = slide.shapes[shape_index].text_frame.paragraphs for paragraph_index in paragraph_index_s: paragraph = paragraphs[paragraph_index] run = paragraph.runs[0] if 'strike' not in run.font._element.attrib: return False return True def has_audio_on_page(slide): for shape in slide.shapes: if shape.shape_type == 13: return True return False def check_for_audio(prs): prs = Presentation(prs) for i, slide in enumerate(prs.slides): if has_audio_on_page(slide): return 1 return 0 def check_formula_shape(prs): prs = Presentation(prs) slide = prs.slides[13] for shape in slide.shapes: if shape.has_text_frame and shape.shape_type == 1: return 1 return 0 def check_slide_orientation_Portrait(pptx_path): presentation = Presentation(pptx_path) slide_height = presentation.slide_height slide_width = presentation.slide_width if slide_width < slide_height: return 1 return 0 def contains_mp4_video(pptx_path): prs = Presentation(pptx_path) for slide in prs.slides: for shape in slide.shapes: if shape.shape_type == 16: if shape.media_type == 3: return 1 return 0 if __name__ == "__main__": path1 = "../../任务数据/LibreOffice Impress/Change_Color_Slide_Number_gold_textbox.pptx" presentation = Presentation(path1) for i, sl in enumerate(presentation.slides): for j, sh in enumerate(sl.shapes): print(i, j, sh, sh.name, sh.shape_type, sh.text)