fix some problems in libreoffice writer

This commit is contained in:
rhythmcao
2024-02-02 02:23:25 +08:00
parent beaea399dd
commit 538b9928fe
6 changed files with 61 additions and 12 deletions

View File

@@ -43,6 +43,7 @@ from .docs import (
compare_highlighted_text,
is_first_line_centered,
check_file_exists,
check_tabstops,
compare_contains_image
)
from .general import (

View File

@@ -6,11 +6,13 @@ import zipfile
from typing import List, Dict, Any
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
from docx.shared import RGBColor
from odf.opendocument import load
from odf.text import P
from odf.text import Span
from skimage.color import deltaE_ciede2000
from skimage.color import rgb2lab
logger = logging.getLogger("desktopenv.metric.docs")
@@ -141,7 +143,7 @@ def compare_docx_tables(docx_file1, docx_file2):
# Compare each cell
for i in range(len(table1.rows)):
for j in range(len(table1.columns)):
if table1.cell(i, j).text != table2.cell(i, j).text:
if table1.cell(i, j).text.strip() != table2.cell(i, j).text.strip():
return 0
return 1
@@ -234,6 +236,40 @@ def check_file_exists(directory, filename):
return 1 if os.path.isfile(file_path) else 0
def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
doc1: Document = Document(docx_file1)
doc2: Document = Document(docx_file2)
para1 = [p for p in doc1.paragraphs if p.text.strip()]
para2 = [p for p in doc2.paragraphs if p.text.strip()]
if len(para1) != len(para2): return .0
if kwargs.get('word_number_split_by_tabstop', None) is not None:
number = kwargs['word_number_split_by_tabstop']
index = kwargs.get('index', 0)
for p1 in para1:
splits = p1.text.split('\t')
if len(splits) == 0: return .0
words = list(filter(lambda x: x.strip(), re.split(r'\s', splits[index])))
if len(words) != number: return .0
section = doc2.sections[0]
paragraph_width = section.page_width - section.left_margin - section.right_margin
ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
minus = .0
for p1, p2 in zip(para1, para2):
# filter CLEAR tabstop and default left-0 tabstop
tabs1 = [tst for tst in p1.paragraph_format.tab_stops if not ignore_tabs(tst)]
tabs2 = [tst for tst in p2.paragraph_format.tab_stops if not ignore_tabs(tst)]
if len(tabs1) != len(tabs2): return .0
difference = .0
for t1, t2 in zip(tabs1, tabs2):
if t1.alignment != t2.alignment: return .0
difference += abs(t1.position - t2.position)
minus += difference / paragraph_width
score = 1 - (minus / len(para1))
return score
def compare_contains_image(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
@@ -258,10 +294,18 @@ def compare_contains_image(docx_file1, docx_file2):
# print(find_default_font("Ani", config_path))
def evaluate_colored_words_in_tables(file_path1, file_path2):
def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
if not compare_docx_files(file_path1, file_path2):
return 0
document = Document(file_path1)
threshold = kwargs.get('threshold', 3.5)
def _calculate_color_difference(rgb1, rgb2):
srgb1 = [rgb1[0] / 255.0, rgb1[1] / 255.0, rgb1[2] / 255.0]
srgb2 = [rgb2[0] / 255.0, rgb2[1] / 255.0, rgb2[2] / 255.0]
lab1, lab2 = rgb2lab(srgb1), rgb2lab(srgb2)
delta_e = deltaE_ciede2000(lab1, lab2)
return delta_e
for table in document.tables:
# Iterate through rows and cells in the table
@@ -273,9 +317,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2):
if word:
first_letter = word[0].lower()
if first_letter in 'aeiou' and run.font.color.rgb != RGBColor(255, 0, 0):
if first_letter in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(255, 0, 0)) > threshold:
return 0 # Vowel-colored words should be red
elif first_letter not in 'aeiou' and run.font.color.rgb != RGBColor(0, 0, 255):
elif first_letter not in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(0, 0, 255)) > threshold:
return 0 # Non-vowel-colored words should be blue
return 1 # All words in tables are correctly colored