diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index def8bf0..57e5d4a 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -55,7 +55,8 @@ def contains_page_break(docx_file): return 0 -def compare_docx_files(file1, file2, ignore_blanks=True): +def compare_docx_files(file1, file2, **options): + ignore_blanks = options.get('ignore_blanks', True) def get_paragraph_texts_odt(document): paragraphs = document.getElementsByType(P) paragraph_texts = [] @@ -250,11 +251,12 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float: splits = p1.text.split('\t') if len(splits) == 0: return .0 words = list(filter(lambda x: x.strip(), re.split(r'\s', splits[index]))) - if len(words) != number: return .0 - + if len(words) != number: return .0 + section = doc2.sections[0] paragraph_width = section.page_width - section.left_margin - section.right_margin - ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0) + ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or ( + x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0) minus = .0 for p1, p2 in zip(para1, para2): # filter CLEAR tabstop and default left-0 tabstop @@ -317,9 +319,12 @@ def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs): if word: first_letter = word[0].lower() - if first_letter in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(255, 0, 0)) > threshold: + if first_letter in 'aeiou' and _calculate_color_difference(run.font.color.rgb, + RGBColor(255, 0, 0)) > threshold: return 0 # Vowel-colored words should be red - elif first_letter not in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(0, 0, 255)) > threshold: + elif first_letter not in 'aeiou' and _calculate_color_difference(run.font.color.rgb, + RGBColor(0, 0, + 255)) > threshold: return 0 # Non-vowel-colored words should be blue return 1 # All words in tables are correctly colored @@ -534,3 +539,11 @@ def compare_highlighted_text(file1, file2): else: return 0 +if __name__ == '__main__': + print( + compare_docx_files( + r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\88fe4b2d-3040-4c70-9a70-546a47764b48\CCCH9003_Tutorial_guidelines.docx", + r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\88fe4b2d-3040-4c70-9a70-546a47764b48\CCCH9003_Tutorial_guidelines_Gold.docx", + ignore_blanks=False + ) + ) diff --git a/evaluation_examples/examples/libreoffice_writer/0b17a146-2934-46c7-8727-73ff6b6483e8.json b/evaluation_examples/examples/libreoffice_writer/0b17a146-2934-46c7-8727-73ff6b6483e8.json index d091c47..048f737 100644 --- a/evaluation_examples/examples/libreoffice_writer/0b17a146-2934-46c7-8727-73ff6b6483e8.json +++ b/evaluation_examples/examples/libreoffice_writer/0b17a146-2934-46c7-8727-73ff6b6483e8.json @@ -52,16 +52,33 @@ } } ], - "func": "compare_docx_files", - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/H2O_Factsheet_WA.docx", - "dest": "H2O_Factsheet_WA.docx" - }, - "expected": { - "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1dM_FSTGDWxSW64VEth_wKMYNkvw0y_tq&export=download&authuser=0&confirm=t&uuid=342f41e2-f48f-41ff-8942-f7dfe5de1dba&at=APZUnTXHfskcX3tvmrSbzCOyQIgb:1704976694506", - "dest": "H2O_Factsheet_WA_Gold.docx" - } + "func": [ + "compare_docx_files", + "compare_subscript_contains" + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/H2O_Factsheet_WA.docx", + "dest": "H2O_Factsheet_WA.docx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/H2O_Factsheet_WA.docx", + "dest": "H2O_Factsheet_WA.docx" + } + ], + "expected": [ + { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1dM_FSTGDWxSW64VEth_wKMYNkvw0y_tq&export=download&authuser=0&confirm=t&uuid=342f41e2-f48f-41ff-8942-f7dfe5de1dba&at=APZUnTXHfskcX3tvmrSbzCOyQIgb:1704976694506", + "dest": "H2O_Factsheet_WA_Gold.docx" + }, + { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1dM_FSTGDWxSW64VEth_wKMYNkvw0y_tq&export=download&authuser=0&confirm=t&uuid=342f41e2-f48f-41ff-8942-f7dfe5de1dba&at=APZUnTXHfskcX3tvmrSbzCOyQIgb:1704976694506", + "dest": "H2O_Factsheet_WA_Gold.docx" + } + ] } } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json b/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json index 5d2a99a..6096b1d 100644 --- a/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json +++ b/evaluation_examples/examples/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48.json @@ -62,6 +62,9 @@ "type": "vm_file", "path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx", "dest": "CCCH9003_Tutorial_guidelines.docx" + }, + "options": { + "ignore_blanks": false } } } \ No newline at end of file