32 lines
801 B
Python
32 lines
801 B
Python
import operator
|
|
from typing import Any
|
|
from typing import Dict
|
|
|
|
import fitz # PyMuPDF
|
|
from pypdf import PdfReader
|
|
|
|
|
|
def check_pdf_pages(pdf_file: str, rules: Dict[str, Any]) -> float:
|
|
if pdf_file is None:
|
|
return 0.0
|
|
reader = PdfReader(pdf_file)
|
|
nb_pages: int = len(reader.pages)
|
|
return float(getattr(operator, rules["relation"])(nb_pages, rules["ref_value"]))
|
|
|
|
|
|
def extract_answers_from_pdf(pdf_file):
|
|
doc = fitz.open(pdf_file)
|
|
answers = []
|
|
|
|
for page in doc:
|
|
text = page.get_text()
|
|
lines = text.split('\n')
|
|
for line in lines:
|
|
if line.strip():
|
|
parts = line.split('=')
|
|
if len(parts) > 1:
|
|
answer = parts[-1].strip()
|
|
answers.append(answer)
|
|
|
|
return answers
|