This commit is contained in:
tsuky_chen
2024-02-02 03:31:19 +08:00
8 changed files with 68 additions and 15 deletions

View File

@@ -43,6 +43,7 @@ from .docs import (
compare_highlighted_text,
is_first_line_centered,
check_file_exists,
check_tabstops,
compare_contains_image
)
from .general import (

View File

@@ -6,11 +6,13 @@ import zipfile
from typing import List, Dict, Any
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
from docx.shared import RGBColor
from odf.opendocument import load
from odf.text import P
from odf.text import Span
from skimage.color import deltaE_ciede2000
from skimage.color import rgb2lab
logger = logging.getLogger("desktopenv.metric.docs")
@@ -141,7 +143,7 @@ def compare_docx_tables(docx_file1, docx_file2):
# Compare each cell
for i in range(len(table1.rows)):
for j in range(len(table1.columns)):
if table1.cell(i, j).text != table2.cell(i, j).text:
if table1.cell(i, j).text.strip() != table2.cell(i, j).text.strip():
return 0
return 1
@@ -234,6 +236,40 @@ def check_file_exists(directory, filename):
return 1 if os.path.isfile(file_path) else 0
def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
doc1: Document = Document(docx_file1)
doc2: Document = Document(docx_file2)
para1 = [p for p in doc1.paragraphs if p.text.strip()]
para2 = [p for p in doc2.paragraphs if p.text.strip()]
if len(para1) != len(para2): return .0
if kwargs.get('word_number_split_by_tabstop', None) is not None:
number = kwargs['word_number_split_by_tabstop']
index = kwargs.get('index', 0)
for p1 in para1:
splits = p1.text.split('\t')
if len(splits) == 0: return .0
words = list(filter(lambda x: x.strip(), re.split(r'\s', splits[index])))
if len(words) != number: return .0
section = doc2.sections[0]
paragraph_width = section.page_width - section.left_margin - section.right_margin
ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
minus = .0
for p1, p2 in zip(para1, para2):
# filter CLEAR tabstop and default left-0 tabstop
tabs1 = [tst for tst in p1.paragraph_format.tab_stops if not ignore_tabs(tst)]
tabs2 = [tst for tst in p2.paragraph_format.tab_stops if not ignore_tabs(tst)]
if len(tabs1) != len(tabs2): return .0
difference = .0
for t1, t2 in zip(tabs1, tabs2):
if t1.alignment != t2.alignment: return .0
difference += abs(t1.position - t2.position)
minus += difference / paragraph_width
score = 1 - (minus / len(para1))
return score
def compare_contains_image(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
@@ -258,10 +294,18 @@ def compare_contains_image(docx_file1, docx_file2):
# print(find_default_font("Ani", config_path))
def evaluate_colored_words_in_tables(file_path1, file_path2):
def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
if not compare_docx_files(file_path1, file_path2):
return 0
document = Document(file_path1)
threshold = kwargs.get('threshold', 3.5)
def _calculate_color_difference(rgb1, rgb2):
srgb1 = [rgb1[0] / 255.0, rgb1[1] / 255.0, rgb1[2] / 255.0]
srgb2 = [rgb2[0] / 255.0, rgb2[1] / 255.0, rgb2[2] / 255.0]
lab1, lab2 = rgb2lab(srgb1), rgb2lab(srgb2)
delta_e = deltaE_ciede2000(lab1, lab2)
return delta_e
for table in document.tables:
# Iterate through rows and cells in the table
@@ -273,9 +317,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2):
if word:
first_letter = word[0].lower()
if first_letter in 'aeiou' and run.font.color.rgb != RGBColor(255, 0, 0):
if first_letter in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(255, 0, 0)) > threshold:
return 0 # Vowel-colored words should be red
elif first_letter not in 'aeiou' and run.font.color.rgb != RGBColor(0, 0, 255):
elif first_letter not in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(0, 0, 255)) > threshold:
return 0 # Non-vowel-colored words should be blue
return 1 # All words in tables are correctly colored

View File

@@ -55,7 +55,7 @@
"func": "compare_line_spacing",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1-svVsH-l2ofufEKuN-cYrIrvXNobtATE&export=download&authuser=0&confirm=t&uuid=be7f891a-f858-48f5-a72d-4e42bbfb8b65&at=APZUnTXzBnaeSJjmxeh4zG03pzA0:1704179807785",
"path": "https://drive.usercontent.google.com/download?id=1-svVsH-l2ofufEKuN-cYrIrvXNobtATE&export=download&authuser=0&confirm=t&uuid=95ca5e2e-7fb3-4084-9f7b-a608a8277322&at=APZUnTXFO_571vyDp_r_LskPfq-j:1706796981024",
"dest": "Novels_Intro_Packet_Gold.docx"
},
"result": {

View File

@@ -1,7 +1,7 @@
{
"id": "0a0faba3-5580-44df-965d-f562a99b291c",
"snapshot": "libreoffice_writer",
"instruction": "I would like to make the first three words of the sentence left-aligned and the rest right-aligned. I basically want to have some empty space in the middle to add some photos. Assume that every sentence will have at least three words. Could you help me on alignment for me?",
"instruction": "I would like to make the first three words of the sentence left-aligned and the rest right-aligned. I basically want to have some empty space in the middle to add some photos. Assume that every sentence will have at least three words. Could you help me on alignment for me using tabstops?",
"source": "https://stackoverflow.com/questions/64528055/how-to-make-part-of-my-sentence-left-aligned-and-rest-as-right-aligned",
"config": [
{
@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.google.com/uc?id=1Wrjxsf184Go70TcRGM4Tohczh29Q9B_U&export=download",
"url": "https://drive.usercontent.google.com/download?id=1Wrjxsf184Go70TcRGM4Tohczh29Q9B_U&export=download&authuser=0&confirm=t&uuid=811f572f-03ee-47b9-8fd5-4978920ff425&at=APZUnTXcRTZAOb33QlpZ7-FT8I8Q:1706799959703",
"path": "Desktop/04 CHIN9505 EBook Purchasing info 2021 Jan.docx"
}
]
@@ -52,16 +52,20 @@
}
}
],
"func": "compare_init_lines",
"func": "check_tabstops",
"expected": {
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=1yyHGj8KUHDMsZmc1QeJ1KkvSEGy83jMR&export=download",
"path": "https://drive.usercontent.google.com/download?id=1yyHGj8KUHDMsZmc1QeJ1KkvSEGy83jMR&export=download&authuser=0&confirm=t&uuid=32f8aa47-c590-4ece-bf65-65a0d683fcfa&at=APZUnTU1_BaeVgyB8GLJWfJrIAYh:1706802911129",
"dest": "04 CHIN9505 EBook Purchasing info 2021 Jan_Gold.docx"
},
"result": {
"type": "vm_file",
"path": "Desktop/04 CHIN9505 EBook Purchasing info 2021 Jan.docx",
"dest": "04 CHIN9505 EBook Purchasing info 2021 Jan.docx"
},
"options": {
"word_number_split_by_tabstop": 3,
"index": 0
}
}
}

View File

@@ -55,7 +55,7 @@
"func": "evaluate_colored_words_in_tables",
"expected": {
"type": "cloud_file",
"path": "https://drive.google.com/uc?id=1ksn444K17lFOdm5pELrQYvuZHkOsKq69&export=download",
"path": "https://drive.usercontent.google.com/download?id=1XmF-6ttL23xMK-j4P50qVGO4vgb6EgZR&export=download&authuser=0&confirm=t&uuid=fe5c16a5-3131-4a19-a6bf-c5e7faf341dd&at=APZUnTWDtqYGJvChovcgUVHDnvzy:1706807220392",
"dest": "Dolch_Sight_Words_Primer_Gold.docx"
},
"result": {

View File

@@ -1,7 +1,7 @@
{
"id": "adf5e2c3-64c7-4644-b7b6-d2f0167927e7",
"snapshot": "libreoffice_writer",
"instruction": "Help me adding \"Steinberg, F. M., Bearden, M. M., & Keen, C. L. (2003). Cocoa and chocolate flavonoids: Implications for cardiovascular health. Journal of the American Dietetic Association, 103(2), 215-223. doi: 10.1053/jada.2003.50028\" to my reference list, and add a cross reference in the fourth paragraph where I marked \"<add here>\".",
"instruction": "Help me adding \"Steinberg, F. M., Bearden, M. M., & Keen, C. L. (2003). Cocoa and chocolate flavonoids: Implications for cardiovascular health. Journal of the American Dietetic Association, 103(2), 215-223. doi: 10.1053/jada.2003.50028\" to my reference list, and add a cross reference (using reference number) in the fourth paragraph where I marked \"<add here>\".",
"source": "https://seekstar.github.io/2022/04/11/libreoffice%E5%BC%95%E7%94%A8%E6%96%87%E7%8C%AE/",
"config": [
{

View File

@@ -481,11 +481,13 @@ class GPT4v_Agent:
"messages": messages,
"max_tokens": self.max_tokens
})
except:
except Exception as e:
logger.warning("LLM INVOCATION ERROR: %s", str(e))
response = ""
logger.debug("RESPONSE: %s", response)
# {{{
if self.exp == "seeact":
messages.append({
"role": "assistant",
@@ -521,7 +523,7 @@ class GPT4v_Agent:
except Exception as e:
print("Failed to parse action from response", e)
actions = None
self.thoughts.append("")
self.thoughts.append("") # }}}
return actions

View File

@@ -39,4 +39,6 @@ fastdtw
odfpy
openai
func-timeout
beautifulsoup4
beautifulsoup4
dashscope
google-generativeai