fix some problems in libreoffice writer
This commit is contained in:
@@ -43,6 +43,7 @@ from .docs import (
|
||||
compare_highlighted_text,
|
||||
is_first_line_centered,
|
||||
check_file_exists,
|
||||
check_tabstops,
|
||||
compare_contains_image
|
||||
)
|
||||
from .general import (
|
||||
|
||||
@@ -6,11 +6,13 @@ import zipfile
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from docx import Document
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
|
||||
from docx.shared import RGBColor
|
||||
from odf.opendocument import load
|
||||
from odf.text import P
|
||||
from odf.text import Span
|
||||
from skimage.color import deltaE_ciede2000
|
||||
from skimage.color import rgb2lab
|
||||
|
||||
logger = logging.getLogger("desktopenv.metric.docs")
|
||||
|
||||
@@ -141,7 +143,7 @@ def compare_docx_tables(docx_file1, docx_file2):
|
||||
# Compare each cell
|
||||
for i in range(len(table1.rows)):
|
||||
for j in range(len(table1.columns)):
|
||||
if table1.cell(i, j).text != table2.cell(i, j).text:
|
||||
if table1.cell(i, j).text.strip() != table2.cell(i, j).text.strip():
|
||||
return 0
|
||||
|
||||
return 1
|
||||
@@ -234,6 +236,40 @@ def check_file_exists(directory, filename):
|
||||
return 1 if os.path.isfile(file_path) else 0
|
||||
|
||||
|
||||
def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
|
||||
doc1: Document = Document(docx_file1)
|
||||
doc2: Document = Document(docx_file2)
|
||||
para1 = [p for p in doc1.paragraphs if p.text.strip()]
|
||||
para2 = [p for p in doc2.paragraphs if p.text.strip()]
|
||||
if len(para1) != len(para2): return .0
|
||||
|
||||
if kwargs.get('word_number_split_by_tabstop', None) is not None:
|
||||
number = kwargs['word_number_split_by_tabstop']
|
||||
index = kwargs.get('index', 0)
|
||||
for p1 in para1:
|
||||
splits = p1.text.split('\t')
|
||||
if len(splits) == 0: return .0
|
||||
words = list(filter(lambda x: x.strip(), re.split(r'\s', splits[index])))
|
||||
if len(words) != number: return .0
|
||||
|
||||
section = doc2.sections[0]
|
||||
paragraph_width = section.page_width - section.left_margin - section.right_margin
|
||||
ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
|
||||
minus = .0
|
||||
for p1, p2 in zip(para1, para2):
|
||||
# filter CLEAR tabstop and default left-0 tabstop
|
||||
tabs1 = [tst for tst in p1.paragraph_format.tab_stops if not ignore_tabs(tst)]
|
||||
tabs2 = [tst for tst in p2.paragraph_format.tab_stops if not ignore_tabs(tst)]
|
||||
if len(tabs1) != len(tabs2): return .0
|
||||
difference = .0
|
||||
for t1, t2 in zip(tabs1, tabs2):
|
||||
if t1.alignment != t2.alignment: return .0
|
||||
difference += abs(t1.position - t2.position)
|
||||
minus += difference / paragraph_width
|
||||
score = 1 - (minus / len(para1))
|
||||
return score
|
||||
|
||||
|
||||
def compare_contains_image(docx_file1, docx_file2):
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
@@ -258,10 +294,18 @@ def compare_contains_image(docx_file1, docx_file2):
|
||||
# print(find_default_font("Ani", config_path))
|
||||
|
||||
|
||||
def evaluate_colored_words_in_tables(file_path1, file_path2):
|
||||
def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
|
||||
if not compare_docx_files(file_path1, file_path2):
|
||||
return 0
|
||||
document = Document(file_path1)
|
||||
threshold = kwargs.get('threshold', 3.5)
|
||||
|
||||
def _calculate_color_difference(rgb1, rgb2):
|
||||
srgb1 = [rgb1[0] / 255.0, rgb1[1] / 255.0, rgb1[2] / 255.0]
|
||||
srgb2 = [rgb2[0] / 255.0, rgb2[1] / 255.0, rgb2[2] / 255.0]
|
||||
lab1, lab2 = rgb2lab(srgb1), rgb2lab(srgb2)
|
||||
delta_e = deltaE_ciede2000(lab1, lab2)
|
||||
return delta_e
|
||||
|
||||
for table in document.tables:
|
||||
# Iterate through rows and cells in the table
|
||||
@@ -273,9 +317,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2):
|
||||
if word:
|
||||
first_letter = word[0].lower()
|
||||
|
||||
if first_letter in 'aeiou' and run.font.color.rgb != RGBColor(255, 0, 0):
|
||||
if first_letter in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(255, 0, 0)) > threshold:
|
||||
return 0 # Vowel-colored words should be red
|
||||
elif first_letter not in 'aeiou' and run.font.color.rgb != RGBColor(0, 0, 255):
|
||||
elif first_letter not in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(0, 0, 255)) > threshold:
|
||||
return 0 # Non-vowel-colored words should be blue
|
||||
|
||||
return 1 # All words in tables are correctly colored
|
||||
|
||||
Reference in New Issue
Block a user