Merge remote-tracking branch 'origin/main'

This commit is contained in:
Timothyxxx
2024-01-28 15:33:19 +08:00
2 changed files with 27 additions and 15 deletions

View File

@@ -1,6 +1,8 @@
import logging import logging
import os import os
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import zipfile
import re
from typing import List, Dict, Any from typing import List, Dict, Any
from docx import Document from docx import Document
@@ -247,14 +249,24 @@ def evaluate_colored_words_in_tables(file_path1, file_path2):
def check_highlighted_words(file_path1, file_path2): def check_highlighted_words(file_path1, file_path2):
if not compare_docx_files(file_path1, file_path2): if not compare_docx_files(file_path1, file_path2):
return 0 return 0
document = Document(file_path1)
# Extract content.xml from the .odt file
extract_dir = file_path1 + "_extracted"
with zipfile.ZipFile(file_path1, 'r') as zip_ref:
zip_ref.extractall(extract_dir)
content_xml_path = os.path.join(extract_dir, 'content.xml')
with open(content_xml_path, 'r') as file:
content_xml = file.read()
for paragraph in document.paragraphs: # Check for yellow highlights in the content.xml
for run in paragraph.runs: yellow_highlight_pattern = re.compile(r'(.{0,50}background-color="#ffff00"[^>]*>.{0,50})')
if run.font.highlight_color is not None: yellow_highlight_matches = yellow_highlight_pattern.findall(content_xml)
return 0 # Highlighted words found
return 1 # No highlighted words found # Return True if yellow highlights are NOT found, otherwise True
if yellow_highlight_matches:
return 0
else:
return 1
def evaluate_strike_through_last_paragraph(file_path1, file_path2): def evaluate_strike_through_last_paragraph(file_path1, file_path2):
@@ -415,4 +427,4 @@ def compare_highlighted_text(file1, file2):
doc2_highlighted = extract_highlighted_text(Document(file2)) doc2_highlighted = extract_highlighted_text(Document(file2))
# Compare the sets of highlighted text to check if they are the same # Compare the sets of highlighted text to check if they are the same
return set(doc1_highlighted) == set(doc2_highlighted) return set(doc1_highlighted) == set(doc2_highlighted)

View File

@@ -9,8 +9,8 @@
"parameters": { "parameters": {
"files": [ "files": [
{ {
"url": "https://drive.usercontent.google.com/download?id=10hgB73d_DoQXQVgUjvgXFUCP1Hd9YxDb&export=download&authuser=0&confirm=t&uuid=845f9616-2fb7-476a-abab-8b620d482ac2&at=APZUnTXB71PxHF7Dq9TC2OL_cRLm:1706199789147", "url": "https://drive.google.com/uc?id=1Ul4mtQ4SpUNLVDlaiJThPprBe6WTGZ2L&export=download",
"path": "Desktop/sample-recruitment-phone-script.docx" "path": "Desktop/sample-recruitment-phone-script.odt"
} }
] ]
} }
@@ -18,7 +18,7 @@
{ {
"type": "open", "type": "open",
"parameters": { "parameters": {
"path": "Desktop/sample-recruitment-phone-script.docx" "path": "Desktop/sample-recruitment-phone-script.odt"
} }
} }
], ],
@@ -31,7 +31,7 @@
{ {
"type": "activate_window", "type": "activate_window",
"parameters": { "parameters": {
"window_name": "sample-recruitment-phone-script.docx - LibreOffice Writer", "window_name": "sample-recruitment-phone-script.odt - LibreOffice Writer",
"strict": true "strict": true
} }
}, },
@@ -55,13 +55,13 @@
"func": "check_highlighted_words", "func": "check_highlighted_words",
"expected": { "expected": {
"type": "cloud_file", "type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1s9Dsy66-zxbCAgeTyCh0P7AT7P4jF6o3&export=download&authuser=0&confirm=t&uuid=1239f2a1-8c86-45a4-8e7d-36388ac22a69&at=APZUnTVZQzXQAMNsKKQzOw5ppT8A:1706017721589", "path": "https://drive.google.com/uc?id=12iMkgCYuUyhKUXux96kANLIeud0Wz9ct&export=download",
"dest": "sample-recruitment-phone-script_Gold.docx" "dest": "sample-recruitment-phone-script_Gold.odt"
}, },
"result": { "result": {
"type": "vm_file", "type": "vm_file",
"path": "Desktop/sample-recruitment-phone-script.docx", "path": "Desktop/sample-recruitment-phone-script.odt",
"dest": "sample-recruitment-phone-script.docx" "dest": "sample-recruitment-phone-script.odt"
} }
} }
} }