Fix bugs in multiple examples
This commit is contained in:
@@ -50,7 +50,11 @@ def contains_page_break(docx_file):
|
||||
if not docx_file:
|
||||
return 0
|
||||
|
||||
doc = Document(docx_file)
|
||||
try:
|
||||
doc = Document(docx_file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
namespaces = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
|
||||
|
||||
@@ -91,16 +95,24 @@ def compare_docx_files(file1, file2, **options):
|
||||
|
||||
# Determine file types and load documents
|
||||
if file1.endswith('.docx') and file2.endswith('.docx'):
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
try:
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
doc1_paragraphs = [p.text for p in doc1.paragraphs]
|
||||
doc2_paragraphs = [p.text for p in doc2.paragraphs]
|
||||
if ignore_order:
|
||||
doc1_paragraphs = sorted(doc1_paragraphs)
|
||||
doc2_paragraphs = sorted(doc2_paragraphs)
|
||||
elif file1.endswith('.odt') and file2.endswith('.odt'):
|
||||
doc1 = load(file1)
|
||||
doc2 = load(file2)
|
||||
try:
|
||||
doc1 = load(file1)
|
||||
doc2 = load(file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
doc1_paragraphs = get_paragraph_texts_odt(doc1)
|
||||
doc2_paragraphs = get_paragraph_texts_odt(doc2)
|
||||
if ignore_order:
|
||||
@@ -153,8 +165,12 @@ def compare_init_lines(file1, file2):
|
||||
if not file1 or not file2:
|
||||
return 0
|
||||
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
try:
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
doc1_paragraphs = [p.text for p in doc1.paragraphs]
|
||||
doc2_paragraphs = [p.text for p in doc2.paragraphs]
|
||||
@@ -173,8 +189,12 @@ def compare_docx_tables(docx_file1, docx_file2):
|
||||
if not docx_file1 or not docx_file2:
|
||||
return 0
|
||||
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
try:
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
# get list of tables in docx
|
||||
tables1 = doc1.tables
|
||||
@@ -202,8 +222,12 @@ def compare_docx_images(docx_file1, docx_file2):
|
||||
if not docx_file1 or not docx_file2:
|
||||
return 0
|
||||
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
try:
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
def extract_images(doc):
|
||||
images = []
|
||||
@@ -240,8 +264,13 @@ def compare_line_spacing(docx_file1, docx_file2):
|
||||
|
||||
if not compare_docx_files(docx_file1, docx_file2):
|
||||
return 0
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
|
||||
try:
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
if len(doc1.paragraphs) != len(doc2.paragraphs):
|
||||
return 0
|
||||
@@ -265,8 +294,12 @@ def compare_insert_equation(docx_file1, docx_file2):
|
||||
if not compare_docx_files(docx_file1, docx_file2):
|
||||
return 0
|
||||
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
try:
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
# Compare each paragraph if it contains equation
|
||||
for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
|
||||
@@ -280,7 +313,12 @@ def compare_font_names(docx_file, rules: List[Dict[str, Any]]):
|
||||
if not docx_file:
|
||||
return 0
|
||||
|
||||
doc = Document(docx_file)
|
||||
try:
|
||||
doc = Document(docx_file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
expected_font = rules["font_name"]
|
||||
|
||||
for paragraph in doc.paragraphs:
|
||||
@@ -295,8 +333,12 @@ def compare_subscript_contains(docx_file1, docx_file2):
|
||||
if not docx_file1 or not docx_file2:
|
||||
return 0
|
||||
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
try:
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
|
||||
for run1, run2 in zip(para1.runs, para2.runs):
|
||||
@@ -310,7 +352,11 @@ def has_page_numbers_in_footers(docx_file):
|
||||
if not docx_file:
|
||||
return 0
|
||||
|
||||
doc = Document(docx_file)
|
||||
try:
|
||||
doc = Document(docx_file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
for section in doc.sections:
|
||||
footer = section.footer
|
||||
@@ -327,7 +373,12 @@ def is_first_line_centered(docx_file):
|
||||
if not docx_file:
|
||||
return 0
|
||||
|
||||
doc = Document(docx_file)
|
||||
try:
|
||||
doc = Document(docx_file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
first_paragraph = doc.paragraphs[0]
|
||||
|
||||
# check if the first line is center justified
|
||||
@@ -345,8 +396,13 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
|
||||
if not docx_file1 or not docx_file2:
|
||||
return .0
|
||||
|
||||
doc1: Document = Document(docx_file1)
|
||||
doc2: Document = Document(docx_file2)
|
||||
try:
|
||||
doc1: Document = Document(docx_file1)
|
||||
doc2: Document = Document(docx_file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return .0
|
||||
|
||||
para1 = [p for p in doc1.paragraphs if p.text.strip()]
|
||||
para2 = [p for p in doc2.paragraphs if p.text.strip()]
|
||||
if len(para1) != len(para2): return .0
|
||||
@@ -383,8 +439,12 @@ def compare_contains_image(docx_file1, docx_file2):
|
||||
if not docx_file1 or not docx_file2:
|
||||
return 0
|
||||
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
try:
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
for para1, para2 in zip(doc1.paragraphs, doc2.paragraphs):
|
||||
for run1, run2 in zip(para1.runs, para2.runs):
|
||||
@@ -400,7 +460,13 @@ def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
|
||||
|
||||
if not compare_docx_files(file_path1, file_path2):
|
||||
return 0
|
||||
document = Document(file_path1)
|
||||
|
||||
try:
|
||||
document = Document(file_path1)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
threshold = kwargs.get('threshold', 3.5)
|
||||
|
||||
def _calculate_color_difference(rgb1, rgb2):
|
||||
@@ -462,7 +528,12 @@ def evaluate_strike_through_last_paragraph(file_path1, file_path2):
|
||||
|
||||
if not compare_docx_files(file_path1, file_path2):
|
||||
return 0
|
||||
document = Document(file_path1)
|
||||
|
||||
try:
|
||||
document = Document(file_path1)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
# Get the last paragraph
|
||||
last_paragraph = document.paragraphs[-1]
|
||||
@@ -479,7 +550,11 @@ def evaluate_conversion(file_path):
|
||||
if not file_path:
|
||||
return 0
|
||||
|
||||
document = Document(file_path)
|
||||
try:
|
||||
document = Document(file_path)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
for table in document.tables:
|
||||
for row in table.rows:
|
||||
@@ -501,7 +576,11 @@ def evaluate_spacing(file_path):
|
||||
if not file_path:
|
||||
return 0
|
||||
|
||||
document = Document(file_path)
|
||||
try:
|
||||
document = Document(file_path)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
# Check line spacing for introduction, body, and conclusion
|
||||
introduction_spacing = document.paragraphs[0].paragraph_format.line_spacing
|
||||
@@ -519,7 +598,13 @@ def check_italic_font_size_14(path1, path2):
|
||||
|
||||
if not compare_docx_files(path1, path2):
|
||||
return 0
|
||||
document = Document(path1)
|
||||
|
||||
try:
|
||||
document = Document(path1)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
for paragraph in document.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
if run.italic:
|
||||
@@ -534,7 +619,11 @@ def evaluate_alignment(docx_path):
|
||||
return 0
|
||||
|
||||
# Load the document
|
||||
doc = Document(docx_path)
|
||||
try:
|
||||
doc = Document(docx_path)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
# Iterate through each paragraph in the document
|
||||
for para in doc.paragraphs:
|
||||
@@ -565,7 +654,12 @@ def get_unique_train_ids(initial_file): # fixed standard
|
||||
if not initial_file:
|
||||
return set(), 0
|
||||
|
||||
doc = Document(initial_file)
|
||||
try:
|
||||
doc = Document(initial_file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return set(), 0
|
||||
|
||||
train_ids = set()
|
||||
processed_lines = 0
|
||||
|
||||
@@ -586,7 +680,13 @@ def check_no_duplicates(initial_file, processed_file):
|
||||
|
||||
# Open the document
|
||||
train_ids_ini, ini_lines = get_unique_train_ids(initial_file)
|
||||
doc_processed = Document(processed_file)
|
||||
|
||||
try:
|
||||
doc_processed = Document(processed_file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
train_ids_pro = set()
|
||||
processed_lines = 0 # Counter for valid lines processed
|
||||
|
||||
@@ -615,10 +715,14 @@ def compare_docx_lines(file1, file2):
|
||||
return 0
|
||||
|
||||
# Read the text of the document, line by line
|
||||
doc1 = Document(file1)
|
||||
doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
|
||||
try:
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
doc2 = Document(file2)
|
||||
doc1_lines = [p.text.strip() for p in doc1.paragraphs if p.text.strip()]
|
||||
doc2_lines = [p.text.strip() for p in doc2.paragraphs if p.text.strip()]
|
||||
# print(doc1_lines)
|
||||
# print(doc2_lines)
|
||||
@@ -638,8 +742,13 @@ def compare_docx_files_and_ignore_new_lines(file1, file2, **options):
|
||||
|
||||
# Determine file types and load documents
|
||||
if file1.endswith('.docx') and file2.endswith('.docx'):
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
try:
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
# First, delete all the blank in paragraphs
|
||||
doc1 = [p for p in doc1.paragraphs if p.text != '']
|
||||
doc2 = [p for p in doc2.paragraphs if p.text != '']
|
||||
@@ -716,8 +825,13 @@ def compare_references(file1, file2, **options):
|
||||
|
||||
# Determine file types and load documents
|
||||
if file1.endswith('.docx') and file2.endswith('.docx'):
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
try:
|
||||
doc1 = Document(file1)
|
||||
doc2 = Document(file2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}")
|
||||
return 0
|
||||
|
||||
doc1_paragraphs = [p.text for p in doc1.paragraphs]
|
||||
doc2_paragraphs = [p.text for p in doc2.paragraphs]
|
||||
else:
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import csv
|
||||
import os
|
||||
import datetime
|
||||
import difflib
|
||||
import functools
|
||||
import json
|
||||
import logging
|
||||
import operator
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
from numbers import Number
|
||||
@@ -13,7 +13,6 @@ from typing import Callable, Any, Union
|
||||
from typing import Dict, List, Pattern
|
||||
|
||||
import lxml.etree
|
||||
import pandas as pd
|
||||
import pdfplumber
|
||||
import yaml
|
||||
from docx import Document
|
||||
@@ -104,13 +103,14 @@ def fuzzy_place_math(result_file_path, rules) -> float:
|
||||
for word in words_list:
|
||||
max_score = 0
|
||||
for ans in expect:
|
||||
score = fuzz.ratio(word, ans)/100
|
||||
score = fuzz.ratio(word, ans) / 100
|
||||
max_score = max(max_score, score)
|
||||
fuzzy_score_list.append(max_score)
|
||||
if len(fuzzy_score_list) != 3:
|
||||
return 0.
|
||||
return sum(fuzzy_score_list) / 3
|
||||
|
||||
|
||||
def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
|
||||
"""
|
||||
Args:
|
||||
@@ -341,27 +341,30 @@ def check_direct_json_object(result, rules) -> float:
|
||||
logger.debug("check_direct_json_object: result is not a valid json object")
|
||||
return 0.
|
||||
|
||||
|
||||
def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
|
||||
if not speedtest_result_path:
|
||||
return 0
|
||||
|
||||
# open the speedtest results file(csv)
|
||||
date_col = None
|
||||
with open(speedtest_result_path, 'r') as f:
|
||||
reader = pd.read_csv(f)
|
||||
for column in reader.columns:
|
||||
if column.startswith('TEST_DATE'):
|
||||
date_col = column
|
||||
break
|
||||
now_date_time = datetime.datetime.now().strftime('%H:%M')
|
||||
for date in reader[date_col]:
|
||||
try:
|
||||
with open(speedtest_result_path, 'r') as f:
|
||||
for i, line in enumerate(f):
|
||||
if i == 1:
|
||||
date = line.split(',')[1]
|
||||
break
|
||||
now_date_time = datetime.datetime.now().strftime('%H:%M')
|
||||
date_time = date[-5:]
|
||||
# compare the date time with the current date time, if time diff less than time_diff para, then return true
|
||||
if not abs((datetime.datetime.strptime(date_time, '%H:%M') - datetime.datetime.strptime(now_date_time,
|
||||
'%H:%M')).total_seconds()) / 60 < int(
|
||||
time_diff):
|
||||
time_diff):
|
||||
return 0
|
||||
return 1
|
||||
except:
|
||||
logger.debug("compare_time_in_speedtest_results: file not found or not readable")
|
||||
return 0
|
||||
|
||||
|
||||
def is_included_all_json_objects(gold_file_path, result_file_path):
|
||||
|
||||
Reference in New Issue
Block a user