diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index a38524d..e588e74 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -11,9 +11,9 @@ from docx.shared import RGBColor from odf.opendocument import load from odf.text import P from odf.text import Span +from rapidfuzz import fuzz from skimage.color import deltaE_ciede2000 from skimage.color import rgb2lab -from rapidfuzz import fuzz logger = logging.getLogger("desktopenv.metric.docs") @@ -173,9 +173,11 @@ def compare_docx_tables(docx_file1, docx_file2): return 1 + from io import BytesIO from PIL import Image + def compare_docx_images(docx_file1, docx_file2): doc1 = Document(docx_file1) doc2 = Document(docx_file2) @@ -187,7 +189,7 @@ def compare_docx_images(docx_file1, docx_file2): img_data = rel.target_part.blob images.append(BytesIO(img_data)) return images - + images1 = extract_images(doc1) images2 = extract_images(doc2) if len(images1) != len(images2): @@ -197,8 +199,10 @@ def compare_docx_images(docx_file1, docx_file2): return 0 return 1 + import pytesseract + def compare_image_text(image_path, rule): img = Image.open(image_path) img_text = pytesseract.image_to_string(img) @@ -207,6 +211,7 @@ def compare_image_text(image_path, rule): else: raise ValueError("Unsupported rule type") + def compare_line_spacing(docx_file1, docx_file2): if not compare_docx_files(docx_file1, docx_file2): return 0 @@ -313,7 +318,7 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float: section = doc2.sections[0] paragraph_width = section.page_width - section.left_margin - section.right_margin ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or ( - x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0) + x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0) minus = .0 for p1, p2 in zip(para1, para2): # filter CLEAR tabstop and default left-0 tabstop @@ -583,3 +588,95 @@ def compare_highlighted_text(file1, file2): return 1 else: return 0 + + +def compare_references(file1, file2, **options): + reference_indicator = options.get('reference_indicator', 'References') + reference_base_result = options.get('reference_base_result', 0.5) + + # Determine file types and load documents + if file1.endswith('.docx') and file2.endswith('.docx'): + doc1 = Document(file1) + doc2 = Document(file2) + doc1_paragraphs = [p.text for p in doc1.paragraphs] + doc2_paragraphs = [p.text for p in doc2.paragraphs] + else: + # Unsupported file types or mismatch + print("Unsupported file types or mismatch between file types.") + return 0 + + # Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list + ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1 + ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1 + + if ref1_idx == -1 and ref2_idx == -1: + return 1 + + if ref1_idx == -1 or ref2_idx == -1: + return 0 + + # split the reference section into reference items, and remove the empty string items + ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()] + ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()] + + # Compare the references + + if len(ref1) != len(ref2): + return 0 + + total_similarity = 0 + for r1, r2 in zip(ref1, ref2): + # fuzzy match the references + similarity = fuzz.ratio(r1, r2) / 100.0 + total_similarity += similarity + + result = total_similarity / len(ref1) + if result >= reference_base_result: + return (result - reference_base_result) / (1 - reference_base_result) + else: + return 0 + + +def compare_answer(file1, file2, **options): + """This is a specific function to compare the """ + # Determine file types and load documents + if file1.endswith('.docx') and file2.endswith('.docx'): + doc1 = Document(file1) + doc2 = Document(file2) + doc1_paragraphs = [p.text for p in doc1.paragraphs] + doc2_paragraphs = [p.text for p in doc2.paragraphs] + else: + # Unsupported file types or mismatch + print("Unsupported file types or mismatch between file types.") + return 0 + + # Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list + ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1 + ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1 + + if ref1_idx == -1 and ref2_idx == -1: + return 1 + + if ref1_idx == -1 or ref2_idx == -1: + return 0 + + # split the reference section into reference items, and remove the empty string items + ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()] + ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()] + + # Compare the references + + if len(ref1) != len(ref2): + return 0 + + total_similarity = 0 + for r1, r2 in zip(ref1, ref2): + # fuzzy match the references + similarity = fuzz.ratio(r1, r2) / 100.0 + total_similarity += similarity + + result = total_similarity / len(ref1) + if result >= reference_base_result: + return (result - reference_base_result) / (1 - reference_base_result) + else: + return 0 diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 4a58e50..f6800f0 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -16,9 +16,9 @@ from openpyxl.utils import get_column_letter from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.worksheet.worksheet import Worksheet -from .utils import _match_value_to_rule, _read_cell_style, read_cell_value -from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\ - , load_filters, load_pivot_tables +from desktop_env.evaluators.metrics.utils import _match_value_to_rule, _read_cell_style, read_cell_value +from desktop_env.evaluators.metrics.utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles \ + , load_filters, load_pivot_tables from rapidfuzz import fuzz # from openpyxl.utils import coordinate_to_tuple diff --git a/evaluation_examples/examples/multi_apps/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json b/evaluation_examples/examples/multi_apps/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json new file mode 100644 index 0000000..0ae39a6 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json @@ -0,0 +1,116 @@ +{ + "id": "1f18aa87-af6f-41ef-9853-cdb8f32ebdea", + "snapshot": "libreoffice_calc", + "instruction": "I've prepared some grammar tests and placed them in the 'Grammar test' folder. I've already provided the multiple-choice answers for Test 1 in the 'answer doc' file. Could you please follow the same format to write out the answers for the remaining two tests in the doc file? This way, I can distribute them to the students as a reference. Thank you.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/students work/", + "/home/user/Desktop/Lec powerpoint/", + "/home/user/Desktop/Grammar test/", + "/home/user/Desktop/Grammar rules PDF/", + "/home/user/Desktop/FDI/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/Grammer test 1.docx", + "url": "https://drive.google.com/uc?id=1VaXQ9XdzMv079xKFs0Y2XrwdmwFHIvBK&export=download" + }, + { + "path": "/home/user/Desktop/Grammer test 2.docx", + "url": "https://drive.google.com/uc?id=1k2T88WreTwi-Yyp9mEJnreEQC3DdkJ2x&export=download" + }, + { + "path": "/home/user/Desktop/Grammer test 3.docx", + "url": "https://drive.google.com/uc?id=1QgyQWVOcAJuPaSlrywb9nuFiQDySsTb2&export=download" + }, + { + "path": "/home/user/Desktop/Answer.docx", + "url": "https://drive.google.com/uc?id=1BC2DuWJuZggmf6fXl6Ys9xQMZzU6a1br&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules02.pdf", + "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules01.pdf", + "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/fragrules.pdf", + "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/csfsrules.pdf", + "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download" + }, + { + "path": "/home/user/Desktop/Public Lecture Teaching Plan.docx", + "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download" + }, + { + "path": "/home/user/Desktop/Course Timetable.xlsx", + "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/1f18aa87-af6f-41ef-9853-cdb8f32ebdea", + "related_apps": [ + "os", + "libreoffice_writer" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Answer.docx - LibreOffice Writer", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); " + ] + } + } + ], + "func": "compare_docx_files", + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1TOMGWC3OFuP6yEGQuRJMEFWdg2NcBPSs&export=download", + "dest": "Answer gold.docx" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/Answer.docx", + "dest": "Answer.docx" + }, + "options": { + "ignore_case": true, + "ignore_blanks": true + } + } +} diff --git a/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json index 239d695..808bbc0 100644 --- a/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json +++ b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json @@ -1,7 +1,7 @@ { "id": "2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e", "snapshot": "libreoffice_calc", - "instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. If it turns out that the current formatting does not align with APA 7 standards, I would greatly appreciate your assistance in making the necessary adjustments to comply with those guidelines. ", + "instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. Making the necessary adjustments if it turns out that the current formatting does not align with APA 7 standards or exists some errors.", "source": "authors", "config": [ { @@ -90,13 +90,45 @@ "related_apps": [ ], "evaluator": { - "postconfig": [], - "func": "", - "result": { - }, + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "case study.docx - LibreOffice Writer", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); " + ] + } + } + ], + "func": "compare_references", "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1325Qfch0JaJ_wJ20ICxMoHeW8KLpK8v0&export=download", + "dest": "case study gold.docx" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/students work/case study.docx", + "dest": "case study.docx" }, "options": { + "content_only": true, + "reference_base_result": 0.92 } } } diff --git a/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json index e1f0544..dd0c8f6 100644 --- a/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json +++ b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json @@ -1,26 +1,169 @@ { - "id": "3a93cae4-ad3e-403e-8c12-65303b271818", - "snapshot": "libreoffice_calc", - "instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!", - "source": "authors", - "config": [ - - ], - "trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818", - "related_apps": [ - - ], - "evaluator": { - "postconfig": [], - "func": "", - "result": { - - }, - "expected": { - - }, - "options": { - + "id": "3a93cae4-ad3e-403e-8c12-65303b271818", + "snapshot": "libreoffice_calc", + "instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/students work/", + "/home/user/Desktop/Lec powerpoint/", + "/home/user/Desktop/Grammar test/", + "/home/user/Desktop/Grammar rules PDF/", + "/home/user/Desktop/FDI/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/students work/Zheng He .docx", + "url": "https://drive.google.com/uc?id=1wI4141LAthnY5m6qcCUaGgDooe4wiTgz&export=download" + }, + { + "path": "/home/user/Desktop/students work/cassie.docx", + "url": "https://drive.google.com/uc?id=1cW9TGJy56vossXxDsdnutPyCbR70af7M&export=download" + }, + { + "path": "/home/user/Desktop/students work/case study.docx", + "url": "https://drive.google.com/uc?id=11GzpoZvp4qnL2ukXdpbhH-a3zOIHhtDx&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules02.pdf", + "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules01.pdf", + "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/fragrules.pdf", + "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/csfsrules.pdf", + "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download" + }, + { + "path": "/home/user/Desktop/Public Lecture Teaching Plan.docx", + "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download" + }, + { + "path": "/home/user/Desktop/Course Timetable.xlsx", + "url": "https://drive.google.com/uc?id=1DSjRYgofPK2jldKwIsAygz2x8XWlXCK6&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818", + "related_apps": [ + "os", + "libreoffice_calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Course Timetable.xlsx - LibreOffice Calc", + "strict": true } - } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); " + ] + } + } + ], + "func": [ + "compare_table", + "compare_table", + "compare_table" + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/Course Timetable.xlsx", + "dest": "Course Timetable.xlsx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/Course Timetable.xlsx", + "dest": "Course Timetable.xlsx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/Course Timetable.xlsx", + "dest": "Course Timetable.xlsx" + } + ], + "expected": [ + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1VMOon8byWuoCW2Uk5etGMJLMzAfwFVyB&export=download", + "dest": "Course Timetable gold.xlsx" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1jAThiIqILZ5t-RFPHVniSvAL8ZJO1H3P&export=download", + "dest": "Course Timetable gold 2.xlsx" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1U0THDtPCgsw-Rb0N9fjF8DeOepPeUajP&export=download", + "dest": "Course Timetable gold 3.xlsx" + } + ], + "options": [ + { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "ignore_case": true + } + ] + }, + { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "ignore_case": true + } + ] + }, + { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "ignore_case": true + } + ] + } + ], + "conj": "or" + } } diff --git a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json deleted file mode 100644 index 28542f8..0000000 --- a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "id": "767a3271-56db-4745-ac5d-846ef05e6fe5", - "snapshot": "libreoffice_calc", - "instruction": "Hey there! I've been swamped with emails lately, and I'm trying to get organized. I'm part of a local community group, and we've been receiving a lot of emails about different events and volunteer opportunities. I need to sort through these emails and pull out specific information to keep track of everything. Could you help me extract details from emails that mention 'volunteer opportunities' and organize them into a spreadsheet? I'm looking for the event name, date, location, and contact person's email. It would be a huge help if we could have this info neatly laid out so we can easily see what's coming up and who to contact. Thanks a bunch!", - "source": "authors", - "config": [ - { - } - ], - "trajectory": "trajectories/767a3271-56db-4745-ac5d-846ef05e6fe5", - "related_apps": [ - "thunderbird", - "libreoffice_calc" - ], - "evaluator": { - "postconfig": [], - "func": "", - "result": { - }, - "expected": { - }, - "options": { - } - } -} diff --git a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json index deda04e..15f1d0e 100644 --- a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json +++ b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json @@ -52,6 +52,29 @@ "image" ], "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": [ + "rm", + "-rf", + "/home/user/Desktop/presenter" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "unzip", + "/home/user/Desktop/presenter.zip", + "-d", + "/home/user/Desktop" + ] + } + } + ], "func": "compare_image_list", "result": { "type": "vm_file",