From fd9f6cbc59063d518847161c14e8d3a1890c574d Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Thu, 7 Mar 2024 18:00:51 +0800 Subject: [PATCH 1/8] Update requirements.txt --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 497b9f8..fdcb95d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -44,3 +44,5 @@ dashscope google-generativeai PyYaml mutagen +pytesseract +borb From 1aa2a439087ead0dc5d43775b1e0004b179381c3 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Thu, 7 Mar 2024 22:15:08 +0800 Subject: [PATCH 2/8] Update multi-apps examples --- desktop_env/evaluators/metrics/docs.py | 103 +++++++++- desktop_env/evaluators/metrics/table.py | 6 +- .../1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json | 116 +++++++++++ .../2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json | 42 +++- .../3a93cae4-ad3e-403e-8c12-65303b271818.json | 189 +++++++++++++++--- .../767a3271-56db-4745-ac5d-846ef05e6fe5.json | 25 --- .../82e3c869-49f6-4305-a7ce-f3e64a0618e7.json | 23 +++ 7 files changed, 445 insertions(+), 59 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json delete mode 100644 evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index a38524d..e588e74 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -11,9 +11,9 @@ from docx.shared import RGBColor from odf.opendocument import load from odf.text import P from odf.text import Span +from rapidfuzz import fuzz from skimage.color import deltaE_ciede2000 from skimage.color import rgb2lab -from rapidfuzz import fuzz logger = logging.getLogger("desktopenv.metric.docs") @@ -173,9 +173,11 @@ def compare_docx_tables(docx_file1, docx_file2): return 1 + from io import BytesIO from PIL import Image + def compare_docx_images(docx_file1, docx_file2): doc1 = Document(docx_file1) doc2 = Document(docx_file2) @@ -187,7 +189,7 @@ def compare_docx_images(docx_file1, docx_file2): img_data = rel.target_part.blob images.append(BytesIO(img_data)) return images - + images1 = extract_images(doc1) images2 = extract_images(doc2) if len(images1) != len(images2): @@ -197,8 +199,10 @@ def compare_docx_images(docx_file1, docx_file2): return 0 return 1 + import pytesseract + def compare_image_text(image_path, rule): img = Image.open(image_path) img_text = pytesseract.image_to_string(img) @@ -207,6 +211,7 @@ def compare_image_text(image_path, rule): else: raise ValueError("Unsupported rule type") + def compare_line_spacing(docx_file1, docx_file2): if not compare_docx_files(docx_file1, docx_file2): return 0 @@ -313,7 +318,7 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float: section = doc2.sections[0] paragraph_width = section.page_width - section.left_margin - section.right_margin ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or ( - x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0) + x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0) minus = .0 for p1, p2 in zip(para1, para2): # filter CLEAR tabstop and default left-0 tabstop @@ -583,3 +588,95 @@ def compare_highlighted_text(file1, file2): return 1 else: return 0 + + +def compare_references(file1, file2, **options): + reference_indicator = options.get('reference_indicator', 'References') + reference_base_result = options.get('reference_base_result', 0.5) + + # Determine file types and load documents + if file1.endswith('.docx') and file2.endswith('.docx'): + doc1 = Document(file1) + doc2 = Document(file2) + doc1_paragraphs = [p.text for p in doc1.paragraphs] + doc2_paragraphs = [p.text for p in doc2.paragraphs] + else: + # Unsupported file types or mismatch + print("Unsupported file types or mismatch between file types.") + return 0 + + # Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list + ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1 + ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1 + + if ref1_idx == -1 and ref2_idx == -1: + return 1 + + if ref1_idx == -1 or ref2_idx == -1: + return 0 + + # split the reference section into reference items, and remove the empty string items + ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()] + ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()] + + # Compare the references + + if len(ref1) != len(ref2): + return 0 + + total_similarity = 0 + for r1, r2 in zip(ref1, ref2): + # fuzzy match the references + similarity = fuzz.ratio(r1, r2) / 100.0 + total_similarity += similarity + + result = total_similarity / len(ref1) + if result >= reference_base_result: + return (result - reference_base_result) / (1 - reference_base_result) + else: + return 0 + + +def compare_answer(file1, file2, **options): + """This is a specific function to compare the """ + # Determine file types and load documents + if file1.endswith('.docx') and file2.endswith('.docx'): + doc1 = Document(file1) + doc2 = Document(file2) + doc1_paragraphs = [p.text for p in doc1.paragraphs] + doc2_paragraphs = [p.text for p in doc2.paragraphs] + else: + # Unsupported file types or mismatch + print("Unsupported file types or mismatch between file types.") + return 0 + + # Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list + ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1 + ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1 + + if ref1_idx == -1 and ref2_idx == -1: + return 1 + + if ref1_idx == -1 or ref2_idx == -1: + return 0 + + # split the reference section into reference items, and remove the empty string items + ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()] + ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()] + + # Compare the references + + if len(ref1) != len(ref2): + return 0 + + total_similarity = 0 + for r1, r2 in zip(ref1, ref2): + # fuzzy match the references + similarity = fuzz.ratio(r1, r2) / 100.0 + total_similarity += similarity + + result = total_similarity / len(ref1) + if result >= reference_base_result: + return (result - reference_base_result) / (1 - reference_base_result) + else: + return 0 diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 4a58e50..f6800f0 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -16,9 +16,9 @@ from openpyxl.utils import get_column_letter from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.worksheet.worksheet import Worksheet -from .utils import _match_value_to_rule, _read_cell_style, read_cell_value -from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\ - , load_filters, load_pivot_tables +from desktop_env.evaluators.metrics.utils import _match_value_to_rule, _read_cell_style, read_cell_value +from desktop_env.evaluators.metrics.utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles \ + , load_filters, load_pivot_tables from rapidfuzz import fuzz # from openpyxl.utils import coordinate_to_tuple diff --git a/evaluation_examples/examples/multi_apps/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json b/evaluation_examples/examples/multi_apps/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json new file mode 100644 index 0000000..0ae39a6 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/1f18aa87-af6f-41ef-9853-cdb8f32ebdea.json @@ -0,0 +1,116 @@ +{ + "id": "1f18aa87-af6f-41ef-9853-cdb8f32ebdea", + "snapshot": "libreoffice_calc", + "instruction": "I've prepared some grammar tests and placed them in the 'Grammar test' folder. I've already provided the multiple-choice answers for Test 1 in the 'answer doc' file. Could you please follow the same format to write out the answers for the remaining two tests in the doc file? This way, I can distribute them to the students as a reference. Thank you.", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/students work/", + "/home/user/Desktop/Lec powerpoint/", + "/home/user/Desktop/Grammar test/", + "/home/user/Desktop/Grammar rules PDF/", + "/home/user/Desktop/FDI/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/Grammer test 1.docx", + "url": "https://drive.google.com/uc?id=1VaXQ9XdzMv079xKFs0Y2XrwdmwFHIvBK&export=download" + }, + { + "path": "/home/user/Desktop/Grammer test 2.docx", + "url": "https://drive.google.com/uc?id=1k2T88WreTwi-Yyp9mEJnreEQC3DdkJ2x&export=download" + }, + { + "path": "/home/user/Desktop/Grammer test 3.docx", + "url": "https://drive.google.com/uc?id=1QgyQWVOcAJuPaSlrywb9nuFiQDySsTb2&export=download" + }, + { + "path": "/home/user/Desktop/Answer.docx", + "url": "https://drive.google.com/uc?id=1BC2DuWJuZggmf6fXl6Ys9xQMZzU6a1br&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules02.pdf", + "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules01.pdf", + "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/fragrules.pdf", + "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/csfsrules.pdf", + "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download" + }, + { + "path": "/home/user/Desktop/Public Lecture Teaching Plan.docx", + "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download" + }, + { + "path": "/home/user/Desktop/Course Timetable.xlsx", + "url": "https://drive.google.com/uc?id=1NGtahknRq_kXsXlw0tRQ1_CZp9SljoVg&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/1f18aa87-af6f-41ef-9853-cdb8f32ebdea", + "related_apps": [ + "os", + "libreoffice_writer" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Answer.docx - LibreOffice Writer", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); " + ] + } + } + ], + "func": "compare_docx_files", + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1TOMGWC3OFuP6yEGQuRJMEFWdg2NcBPSs&export=download", + "dest": "Answer gold.docx" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/Answer.docx", + "dest": "Answer.docx" + }, + "options": { + "ignore_case": true, + "ignore_blanks": true + } + } +} diff --git a/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json index 239d695..808bbc0 100644 --- a/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json +++ b/evaluation_examples/examples/multi_apps/2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e.json @@ -1,7 +1,7 @@ { "id": "2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e", "snapshot": "libreoffice_calc", - "instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. If it turns out that the current formatting does not align with APA 7 standards, I would greatly appreciate your assistance in making the necessary adjustments to comply with those guidelines. ", + "instruction": "Could you please take a moment to review the 'case study' file located within the 'student work' folder? I'm particularly interested in ensuring that the references section at the end of the document adheres to the APA 7th edition formatting guidelines. Making the necessary adjustments if it turns out that the current formatting does not align with APA 7 standards or exists some errors.", "source": "authors", "config": [ { @@ -90,13 +90,45 @@ "related_apps": [ ], "evaluator": { - "postconfig": [], - "func": "", - "result": { - }, + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "case study.docx - LibreOffice Writer", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); " + ] + } + } + ], + "func": "compare_references", "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1325Qfch0JaJ_wJ20ICxMoHeW8KLpK8v0&export=download", + "dest": "case study gold.docx" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/students work/case study.docx", + "dest": "case study.docx" }, "options": { + "content_only": true, + "reference_base_result": 0.92 } } } diff --git a/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json index e1f0544..dd0c8f6 100644 --- a/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json +++ b/evaluation_examples/examples/multi_apps/3a93cae4-ad3e-403e-8c12-65303b271818.json @@ -1,26 +1,169 @@ { - "id": "3a93cae4-ad3e-403e-8c12-65303b271818", - "snapshot": "libreoffice_calc", - "instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!", - "source": "authors", - "config": [ - - ], - "trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818", - "related_apps": [ - - ], - "evaluator": { - "postconfig": [], - "func": "", - "result": { - - }, - "expected": { - - }, - "options": { - + "id": "3a93cae4-ad3e-403e-8c12-65303b271818", + "snapshot": "libreoffice_calc", + "instruction": "Could you please add a two-hour lecture slot to my weekly course timetable, scheduled for every Wednesday at 12 PM? It seems I accidentally omitted that when setting up my schedule. I'd appreciate you taking care of that for me. Thanks!", + "source": "authors", + "config": [ + { + "type": "command", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/Desktop/students work/", + "/home/user/Desktop/Lec powerpoint/", + "/home/user/Desktop/Grammar test/", + "/home/user/Desktop/Grammar rules PDF/", + "/home/user/Desktop/FDI/" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/students work/Zheng He .docx", + "url": "https://drive.google.com/uc?id=1wI4141LAthnY5m6qcCUaGgDooe4wiTgz&export=download" + }, + { + "path": "/home/user/Desktop/students work/cassie.docx", + "url": "https://drive.google.com/uc?id=1cW9TGJy56vossXxDsdnutPyCbR70af7M&export=download" + }, + { + "path": "/home/user/Desktop/students work/case study.docx", + "url": "https://drive.google.com/uc?id=11GzpoZvp4qnL2ukXdpbhH-a3zOIHhtDx&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules02.pdf", + "url": "https://drive.google.com/uc?id=1Eln9ehX6y6Df2-S_Hp7Ao1teKRu6I1Tg&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/irregularrules01.pdf", + "url": "https://drive.google.com/uc?id=1krdEEdNWvTwMKZU14QtI_xc2lCFVeVcl&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/fragrules.pdf", + "url": "https://drive.google.com/uc?id=1IXyI2KeiXsuh6XV2LelcmhZ2PDh_dBQf&export=download" + }, + { + "path": "/home/user/Desktop/Grammar rules PDF/csfsrules.pdf", + "url": "https://drive.google.com/uc?id=1ernwGGrjhYNoHVNAevdb2qNKQ0I5n3RP&export=download" + }, + { + "path": "/home/user/Desktop/Public Lecture Teaching Plan.docx", + "url": "https://drive.google.com/uc?id=1ywfVFTEbiSkypZpzLjLmq_ppSbQIC8s8&export=download" + }, + { + "path": "/home/user/Desktop/Course Timetable.xlsx", + "url": "https://drive.google.com/uc?id=1DSjRYgofPK2jldKwIsAygz2x8XWlXCK6&export=download" + } + ] + } + } + ], + "trajectory": "trajectories/3a93cae4-ad3e-403e-8c12-65303b271818", + "related_apps": [ + "os", + "libreoffice_calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Course Timetable.xlsx - LibreOffice Calc", + "strict": true } - } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); " + ] + } + } + ], + "func": [ + "compare_table", + "compare_table", + "compare_table" + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/Course Timetable.xlsx", + "dest": "Course Timetable.xlsx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/Course Timetable.xlsx", + "dest": "Course Timetable.xlsx" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/Course Timetable.xlsx", + "dest": "Course Timetable.xlsx" + } + ], + "expected": [ + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1VMOon8byWuoCW2Uk5etGMJLMzAfwFVyB&export=download", + "dest": "Course Timetable gold.xlsx" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1jAThiIqILZ5t-RFPHVniSvAL8ZJO1H3P&export=download", + "dest": "Course Timetable gold 2.xlsx" + }, + { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1U0THDtPCgsw-Rb0N9fjF8DeOepPeUajP&export=download", + "dest": "Course Timetable gold 3.xlsx" + } + ], + "options": [ + { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "ignore_case": true + } + ] + }, + { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "ignore_case": true + } + ] + }, + { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": "RNSheet1", + "sheet_idx1": "ENSheet1", + "ignore_case": true + } + ] + } + ], + "conj": "or" + } } diff --git a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json b/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json deleted file mode 100644 index 28542f8..0000000 --- a/evaluation_examples/examples/multi_apps/767a3271-56db-4745-ac5d-846ef05e6fe5.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "id": "767a3271-56db-4745-ac5d-846ef05e6fe5", - "snapshot": "libreoffice_calc", - "instruction": "Hey there! I've been swamped with emails lately, and I'm trying to get organized. I'm part of a local community group, and we've been receiving a lot of emails about different events and volunteer opportunities. I need to sort through these emails and pull out specific information to keep track of everything. Could you help me extract details from emails that mention 'volunteer opportunities' and organize them into a spreadsheet? I'm looking for the event name, date, location, and contact person's email. It would be a huge help if we could have this info neatly laid out so we can easily see what's coming up and who to contact. Thanks a bunch!", - "source": "authors", - "config": [ - { - } - ], - "trajectory": "trajectories/767a3271-56db-4745-ac5d-846ef05e6fe5", - "related_apps": [ - "thunderbird", - "libreoffice_calc" - ], - "evaluator": { - "postconfig": [], - "func": "", - "result": { - }, - "expected": { - }, - "options": { - } - } -} diff --git a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json index deda04e..15f1d0e 100644 --- a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json +++ b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json @@ -52,6 +52,29 @@ "image" ], "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": [ + "rm", + "-rf", + "/home/user/Desktop/presenter" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "unzip", + "/home/user/Desktop/presenter.zip", + "-d", + "/home/user/Desktop" + ] + } + } + ], "func": "compare_image_list", "result": { "type": "vm_file", From 1af9d8911d3431c928bea9439c3fa442f2028ba0 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Thu, 7 Mar 2024 22:15:23 +0800 Subject: [PATCH 3/8] Update multi-apps examples --- desktop_env/evaluators/metrics/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index d5394e5..b6a98c6 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -48,7 +48,8 @@ from .docs import ( check_tabstops, compare_contains_image, compare_docx_images, - compare_image_text + compare_image_text, + compare_references ) from .general import ( check_csv, From ddcafac30fca5a0b125b1be1c4f50eb73004506a Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 7 Mar 2024 23:16:55 +0800 Subject: [PATCH 4/8] ver Mar7thv2 updated requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 497b9f8..e4ab463 100644 --- a/requirements.txt +++ b/requirements.txt @@ -44,3 +44,4 @@ dashscope google-generativeai PyYaml mutagen +pytesseract From 89f0fc5410e8e84851f8a6152e62309482f7f26a Mon Sep 17 00:00:00 2001 From: rhythmcao Date: Fri, 8 Mar 2024 00:03:08 +0800 Subject: [PATCH 5/8] update multi-apps --- desktop_env/evaluators/getters/__init__.py | 2 +- desktop_env/evaluators/getters/impress.py | 61 ++++++++++++ desktop_env/evaluators/metrics/vscode.py | 13 +++ .../0e5303d4-8820-42f6-b18d-daf7e633de21.json | 95 +++++++++++++++++++ .../236833a3-5704-47fc-888c-4f298f09f799.json | 18 +++- .../3e3fc409-bff3-4905-bf16-c968eee3f807.json | 83 ++++++++++++++++ .../47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json | 89 +++++++++++++++++ .../5df7b33a-9f77-4101-823e-02f863e1c1ae.json | 95 +++++++++++++++++++ .../df67aebb-fb3a-44fd-b75b-51b6012df509.json | 95 +++++++++++++++++++ 9 files changed, 547 insertions(+), 4 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/0e5303d4-8820-42f6-b18d-daf7e633de21.json create mode 100644 evaluation_examples/examples/multi_apps/3e3fc409-bff3-4905-bf16-c968eee3f807.json create mode 100644 evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json create mode 100644 evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json create mode 100644 evaluation_examples/examples/multi_apps/df67aebb-fb3a-44fd-b75b-51b6012df509.json diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py index 958d98d..d5c900e 100644 --- a/desktop_env/evaluators/getters/__init__.py +++ b/desktop_env/evaluators/getters/__init__.py @@ -29,7 +29,7 @@ from .chrome import ( from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file from .general import get_vm_command_line, get_vm_terminal_output from .gimp import get_gimp_config_file -from .impress import get_audio_in_slide +from .impress import get_audio_in_slide, get_background_image_in_slide from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime from .replay import get_replay diff --git a/desktop_env/evaluators/getters/impress.py b/desktop_env/evaluators/getters/impress.py index d0673dd..f6bc60f 100644 --- a/desktop_env/evaluators/getters/impress.py +++ b/desktop_env/evaluators/getters/impress.py @@ -7,6 +7,67 @@ from typing import Dict from desktop_env.evaluators.getters.file import get_vm_file +def get_background_image_in_slide(env, config: Dict[str, str]): + ppt_file_path, slide_index, dest = config["ppt_file_path"], int(config["slide_index"]), config["dest"] + image_id, image_file_path = None, None + + ppt_file_localhost_path = get_vm_file(env, {"path": ppt_file_path, "dest": os.path.split(ppt_file_path)[-1]}) + + with zipfile.ZipFile(ppt_file_localhost_path, 'r') as myzip: + slide1_xml_file = 'ppt/slides/slide{}.xml'.format(slide_index + 1) + # firstly, check whether the background image is used in the slide + if slide1_xml_file not in myzip.namelist(): return None + with myzip.open(slide1_xml_file) as f: + # Parse the XML tree from the relationships file + tree = ET.parse(f) + root = tree.getroot() + bg_tag = "{http://schemas.openxmlformats.org/presentationml/2006/main}bgPr" + image_tag = "{http://schemas.openxmlformats.org/drawingml/2006/main}blip" + attr_tag = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed" + for child in root.iter(bg_tag): + try: + for element in child.iter(image_tag): + image_id = element.attrib[attr_tag] + break + except: pass + if image_id is not None: break + else: return None + + # next, extract the background image from the slide + slide1_rels_file = 'ppt/slides/_rels/slide{}.xml.rels'.format(slide_index + 1) + if slide1_rels_file in myzip.namelist(): + with myzip.open(slide1_rels_file) as f: + # Parse the XML tree from the relationships file + tree = ET.parse(f) + root = tree.getroot() + # Define the namespace used in the relationships file + namespaces = {'r': 'http://schemas.openxmlformats.org/package/2006/relationships'} + # Look for all relationship elements that have a type attribute for image + for rel in root.findall('r:Relationship', namespaces): + # Check if the relationship is for an image file + if 'image' in rel.attrib['Type'] and rel.attrib['Id'] == image_id: + target = rel.attrib['Target'] + if target.startswith('..'): + # Resolve the relative path to get the correct path within the zip file + image_file_path = os.path.normpath(os.path.join('ppt/slides', target)) + # Replace backslashes with forward slashes for ZIP compatibility + image_file_path = image_file_path.replace('\\', '/') + tmpdirname = os.path.dirname(ppt_file_localhost_path) + myzip.extract(image_file_path, tmpdirname) + image_file_path = os.path.join(tmpdirname, image_file_path) + return image_file_path + else: # absolute path + assert target.startswith("file://"), target + image_file_path = target[7:] + break + if image_file_path is None: + return None + + else: + # Get the audio file from vm and return the file path in the host + return get_vm_file(env, {"path": image_file_path, "dest": dest}) + + def get_audio_in_slide(env, config: Dict[str, str]): ppt_file_path, slide_index, dest = config["ppt_file_path"], int(config["slide_index"]), config["dest"] diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py index a3e6779..e37d04c 100644 --- a/desktop_env/evaluators/metrics/vscode.py +++ b/desktop_env/evaluators/metrics/vscode.py @@ -2,6 +2,7 @@ import copy import importlib.util import json import sys +import re from typing import Dict @@ -86,6 +87,18 @@ def compare_text_file(actual: str, expected: str, **options) -> float: with open(expected) as f2: expected_text = f2.read() + ignore_blanks = options.get('ignore_blanks', False) + if ignore_blanks: + actual_text = re.sub(r'[\t\n]', ' ', actual_text).strip() + actual_text = re.sub(r'\s+', ' ', actual_text) + expected_text = re.sub(r'[\t\n]', ' ', expected_text).strip() + expected_text = re.sub(r'\s+', ' ', expected_text) + + ignore_case = options.get('ignore_case', False) + if ignore_case: + actual_text = actual_text.lower() + expected_text = expected_text.lower() + if actual_text == expected_text: return 1.0 return 0.0 diff --git a/evaluation_examples/examples/multi_apps/0e5303d4-8820-42f6-b18d-daf7e633de21.json b/evaluation_examples/examples/multi_apps/0e5303d4-8820-42f6-b18d-daf7e633de21.json new file mode 100644 index 0000000..abc1284 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/0e5303d4-8820-42f6-b18d-daf7e633de21.json @@ -0,0 +1,95 @@ +{ + "id": "0e5303d4-8820-42f6-b18d-daf7e633de21", + "snapshot": "chrome", + "instruction": "I want to learn python programming and my friend recommends me this course website. I have grabbed the lecture slide for week 0. Please download the PDFs for other weeks into the opened folder and leave the file name as-it-is.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://cs50.harvard.edu/python/2022/weeks/0/" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "mkdir", + "-p", + "/home/user/lecture_slides" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1OdvHgcHXSn62xXe_VrPTN0HLWHmrcfdY&export=download&authuser=0&confirm=t", + "path": "/home/user/lecture_slides/lecture0.pdf" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "nautilus", + "/home/user/lecture_slides" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "os", + "chrome" + ], + "evaluator": { + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": [ + "/bin/bash", + "-c", + "cd /home/user && zip -qr lecture_slides.zip lecture_slides/" + ] + } + } + ], + "func": "compare_archive", + "result": { + "type": "vm_file", + "path": "/home/user/lecture_slides.zip", + "dest": "lecture_slides.zip" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1Ej2iHG8p-QJe7FZQpPIIS82BHOlFAUQM&export=download&authuser=0&confirm=t", + "dest": "gold_lecture_slides.zip" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json b/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json index 3c5f349..43c0cc2 100644 --- a/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json +++ b/evaluation_examples/examples/multi_apps/236833a3-5704-47fc-888c-4f298f09f799.json @@ -1,7 +1,7 @@ { "id": "236833a3-5704-47fc-888c-4f298f09f799", "snapshot": "chrome", - "instruction": "Find daily papers on Huggingface and take down all the titles, authors and the abstracts of papers on 1st March, 2024 in the doc file 'paper_reading_2024_03_01.docx' on desktop. Each paragraph (split by empty lines) conforms to the following format:\nTitle: xxx\nAuthors: xxx, xxx, xxx\nAbstract: xxxxxxxx.\nArxiv PDF: https://xxxx.pdf", + "instruction": "Find the daily paper list on Huggingface and take down the meta information of papers on 1st March, 2024 in the opened .docx file. I have recorded two papers. Please conform to the format and complete others.", "source": "authors", "config": [ { @@ -31,12 +31,24 @@ ] } }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1WEDfILO-NijZBGArZ3ovO1933uHeOi1A&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/paper_reading_2024_03_01.docx" + } + ] + } + }, { "type": "launch", "parameters": { "command": [ "libreoffice", - "--writer" + "--writer", + "/home/user/Desktop/paper_reading_2024_03_01.docx" ] } } @@ -55,7 +67,7 @@ }, "expected": { "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1TUTihXD93bIlekuYy_44fmXAhI1KVol4&export=download&authuser=0&confirm=t", + "path": "https://drive.usercontent.google.com/download?id=1wb0sQnVDCAz8sS49kO8boJIa1kqI5mx0&export=download&authuser=0&confirm=t", "dest": "gold_paper_reading_2024_03_01.docx" }, "options": { diff --git a/evaluation_examples/examples/multi_apps/3e3fc409-bff3-4905-bf16-c968eee3f807.json b/evaluation_examples/examples/multi_apps/3e3fc409-bff3-4905-bf16-c968eee3f807.json new file mode 100644 index 0000000..bff6446 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/3e3fc409-bff3-4905-bf16-c968eee3f807.json @@ -0,0 +1,83 @@ +{ + "id": "3e3fc409-bff3-4905-bf16-c968eee3f807", + "snapshot": "chrome", + "instruction": "I'm a huge movie fan and have kept a record of all the movies I've watched. I'm curious to find out if there are any films released before 2024 from the IMDB Top 30 list that I haven't seen yet. Help me create another sheet 'unseen_movies' in the opened Excel. This sheet should share the same headers and sort the results according to IMDB rankings from high to low.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.imdb.com" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1KVNVf5qZhprV_7rgEl33Qrkagv603reM&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/movies.xlsx" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--calc", + "/home/user/Desktop/movies.xlsx" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_calc", + "chrome" + ], + "evaluator": { + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/movies.xlsx", + "dest": "movies.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=149QKswQ8AIYk21Aaatic6QSCcBU40uyd&export=download&authuser=0&confirm=t", + "dest": "gold_movies.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": "RNunseen_movies", + "sheet_idx1": "ENunseen_movies" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json b/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json new file mode 100644 index 0000000..b778489 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json @@ -0,0 +1,89 @@ +{ + "id": "47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5", + "snapshot": "vlc", + "instruction": "The landscape at 00:08 in this video is so beautiful. Please extract this frame and set it as the background of the second page of the opened slides.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1-De7kW-JOEiwlZXfLG7bSTpycUz7zw_Y&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/landscape.mp4" + }, + { + "url": "https://drive.usercontent.google.com/download?id=1v7J3uaxjM5wSz8xidcV0p7tMfkJCHw3_&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/Robotic_Workshop_Infographics.pptx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Robotic_Workshop_Infographics.pptx" + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "vlc", + "--repeat", + "/home/user/Desktop/landscape.mp4" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_impress", + "vlc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Robotic_Workshop_Infographics.pptx - LibreOffice Impress", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_images", + "result": { + "type": "background_image_in_slide", + "ppt_file_path": "/home/user/Desktop/Robotic_Workshop_Infographics.pptx", + "slide_index": 1, + "dest": "landscape.png" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=17Dt8EalJj9ksbfuj6dowY3OpDqcK80QZ&export=download&authuser=0&confirm=t", + "dest": "gold_landscape.png" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json b/evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json new file mode 100644 index 0000000..64bdee1 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json @@ -0,0 +1,95 @@ +{ + "id": "df67aebb-fb3a-44fd-b75b-51b6012df509", + "snapshot": "vscode", + "instruction": "I am writing my paper thesis. I have listed all referenced papers in the opened docx. But my mentor asked me to use latex instead of word writer. So could you help me export the dblp bibtex of these papers into 'references.bib'. By the way, if the paper is published, do not use the arxiv version. Separate each bibtex dict with a blank line for clarity.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://dblp.org/" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "touch", + "/home/user/Desktop/references.bib" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "code", + "/home/user/Desktop/references.bib" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1NDYJ7XyiUVJYuMBDAYvObrSZdODrhUMi&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/references.docx" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--writer", + "/home/user/Desktop/references.docx" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome", + "libreoffice_writer" + ], + "evaluator": { + "func": "compare_text_file", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/references.bib", + "dest": "references.bib" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1eeaPRSAWhzsNGFuda_u6phsdQMZHBkb2&export=download&authuser=0&confirm=t", + "dest": "gold_references.bib" + }, + "options": { + "ignore_blanks": true + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/df67aebb-fb3a-44fd-b75b-51b6012df509.json b/evaluation_examples/examples/multi_apps/df67aebb-fb3a-44fd-b75b-51b6012df509.json new file mode 100644 index 0000000..64bdee1 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/df67aebb-fb3a-44fd-b75b-51b6012df509.json @@ -0,0 +1,95 @@ +{ + "id": "df67aebb-fb3a-44fd-b75b-51b6012df509", + "snapshot": "vscode", + "instruction": "I am writing my paper thesis. I have listed all referenced papers in the opened docx. But my mentor asked me to use latex instead of word writer. So could you help me export the dblp bibtex of these papers into 'references.bib'. By the way, if the paper is published, do not use the arxiv version. Separate each bibtex dict with a blank line for clarity.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://dblp.org/" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "touch", + "/home/user/Desktop/references.bib" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "code", + "/home/user/Desktop/references.bib" + ] + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1NDYJ7XyiUVJYuMBDAYvObrSZdODrhUMi&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/references.docx" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--writer", + "/home/user/Desktop/references.docx" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "chrome", + "libreoffice_writer" + ], + "evaluator": { + "func": "compare_text_file", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/references.bib", + "dest": "references.bib" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1eeaPRSAWhzsNGFuda_u6phsdQMZHBkb2&export=download&authuser=0&confirm=t", + "dest": "gold_references.bib" + }, + "options": { + "ignore_blanks": true + } + } +} \ No newline at end of file From b8d54e8bac6d3f84ff94d27e06a7f088becf4120 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Fri, 8 Mar 2024 11:47:14 +0800 Subject: [PATCH 6/8] Fix a bug in Chrome evaluator --- desktop_env/evaluators/getters/chrome.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index 8bd5842..2a6a9ea 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -589,6 +589,10 @@ def get_active_url_from_accessTree(env, config): if len(elements) == 0: print("no elements found") return None + elif elements[-1].text is None: + print("no text found") + return None + active_tab_url = config["goto_prefix"] + elements[0].text if "goto_prefix" in config.keys() else "https://" + \ elements[0].text print("active tab url now: {}".format(active_tab_url)) From ce23f3dab484c19f4aac6b3f020435262d55d64f Mon Sep 17 00:00:00 2001 From: David Chang Date: Fri, 8 Mar 2024 13:28:34 +0800 Subject: [PATCH 7/8] ver Mar8th fixed a task and a metric --- desktop_env/evaluators/metrics/table.py | 6 +-- desktop_env/evaluators/metrics/utils.py | 7 ++- .../b5062e3e-641c-4e3a-907b-ac864d2e7652.json | 45 ++++++++++--------- 3 files changed, 33 insertions(+), 25 deletions(-) diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 4a58e50..0a03295 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -194,7 +194,7 @@ def compare_table(result: str, expected: str = None, **options) -> float: # sheet_idx1: as sheet_idx0 # rules: list of dict, each dict is like # { "range": ["A1:B6", "C2:E5"], - # "type": "includes" | "includes_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1 + # "type": "includes" | "included_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1 # "threshold": 85, // for fuzzy match # "ignore_case": true | false, # "ignore_chars": " ()", # filtered out @@ -232,9 +232,9 @@ def compare_table(result: str, expected: str = None, **options) -> float: value2 = value2.lower() if rl["type"]=="includes": - metric: bool = value1 in value2 - elif rl["type"]=="includes_by": metric: bool = value2 in value1 + elif rl["type"]=="included_by": + metric: bool = value1 in value2 elif rl["type"]=="fuzzy_match": metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.) elif rl["type"]=="exact_match": diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index b57de00..d1530b9 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -274,7 +274,8 @@ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[s # }}} function load_pivot_tables # -_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping) +_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si", namespaces=_xlsx_ns_mapping) +_shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx_ns_mapping) def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: @@ -285,7 +286,9 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any: with z_f.open("xl/sharedStrings.xml") as f: shared_str_xml: _Element = lxml.etree.fromstring(f.read()) str_elements: List[_Element] = _shared_str_selector(shared_str_xml) - shared_strs: List[str] = [elm.text for elm in str_elements] + shared_strs: List[str] = [ "".join(t.text for t in _shared_str_value_selector(elm))\ + for elm in str_elements + ] except: logger.debug("Read shared strings error: %s", xlsx_file) diff --git a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json index 5d84224..e9b0278 100644 --- a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json +++ b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json @@ -64,35 +64,40 @@ "func": "compare_table", "result": { "type": "vm_file", - "path": [ - "/home/user/authors.xlsx", - "/home/user/authors-Sheet1.csv" - ], - "dest": [ - "authors.xlsx", - "authors-Sheet1.csv" - ], - "multi": true + "path": "/home/user/authors.xlsx", + "dest": "authors.xlsx" }, "expected": { "type": "cloud_file", - "path": [ - "https://drive.google.com/uc?id=1fttbvfHuoQfsQUk3fVXkJsCu231jhnQj&export=download", - "https://drive.google.com/uc?id=1fq4hbk1g9R_SjknzwFAqvyF1ICyNYfok&export=download" - ], - "dest": [ - "authors-gt.xlsx", - "authors-gt-Sheet1.csv" - ], - "multi": true + "path": "https://drive.google.com/uc?id=1fttbvfHuoQfsQUk3fVXkJsCu231jhnQj&export=download", + "dest": "authors-gt.xlsx" }, "options": { "rules": [ { - "type": "sheet_print", + "type": "sheet_fuzzy", "sheet_idx0": "RNSheet1", "sheet_idx1": "ENSheet1", - "ignore_case": true + "rules": [ + { + "range": ["A1:C1"], + "type": "includes", + "ignore_case": true + }, + { + "range": ["A2:B5"], + "type": "exact_match", + "trim_leadings": " ", + "trim_trailings": " " + }, + { + "range": ["C2:C5"], + "type": "exact_match", + "trim_leadings": " ", + "trim_trailings": " ", + "ignore_case": true + } + ] } ] } From 8df2233730956a5e44c5fb696ec579111a0edbc3 Mon Sep 17 00:00:00 2001 From: rhythmcao Date: Fri, 8 Mar 2024 19:25:51 +0800 Subject: [PATCH 8/8] add multi-turn examples (in total, add 12 examples by ruisheng.cao 2024-03-08) --- .../5df7b33a-9f77-4101-823e-02f863e1c1ae.json | 88 ++++++++----------- .../aceb0368-56b8-4073-b70e-3dc9aee184e0.json | 87 ++++++++++++++++++ .../c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json | 58 ++++++++++++ 3 files changed, 180 insertions(+), 53 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/aceb0368-56b8-4073-b70e-3dc9aee184e0.json create mode 100644 evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json diff --git a/evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json b/evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json index 64bdee1..e00b198 100644 --- a/evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json +++ b/evaluation_examples/examples/multi_apps/5df7b33a-9f77-4101-823e-02f863e1c1ae.json @@ -1,33 +1,17 @@ { - "id": "df67aebb-fb3a-44fd-b75b-51b6012df509", - "snapshot": "vscode", - "instruction": "I am writing my paper thesis. I have listed all referenced papers in the opened docx. But my mentor asked me to use latex instead of word writer. So could you help me export the dblp bibtex of these papers into 'references.bib'. By the way, if the paper is published, do not use the arxiv version. Separate each bibtex dict with a blank line for clarity.", + "id": "5df7b33a-9f77-4101-823e-02f863e1c1ae", + "snapshot": "libreoffice_writer", + "instruction": "I enjoy reading during my spare time, but this book is too bulky. Each time I open it, I have to find where I left off, which is a hassle. I'd like to divide the book into several PDFs, each containing a different chapter, and name them following the example I set with chapter one.", "source": "authors", "config": [ { - "type": "launch", + "type": "download", "parameters": { - "command": [ - "google-chrome", - "--remote-debugging-port=1337" - ] - } - }, - { - "type": "launch", - "parameters": { - "command": [ - "socat", - "tcp-listen:9222,fork", - "tcp:localhost:1337" - ] - } - }, - { - "type": "chrome_open_tabs", - "parameters": { - "urls_to_open": [ - "https://dblp.org/" + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1iaPADtkbDGbupuwNQiG-L9Topt_VJl8g&export=download&authuser=0&confirm=t", + "path": "/home/user/Desktop/book.zip" + } ] } }, @@ -35,8 +19,9 @@ "type": "execute", "parameters": { "command": [ - "touch", - "/home/user/Desktop/references.bib" + "/bin/bash", + "-c", + "unzip -q /home/user/Desktop/book.zip -d /home/user/Desktop && rm /home/user/Desktop/book.zip" ] } }, @@ -44,52 +29,49 @@ "type": "launch", "parameters": { "command": [ - "code", - "/home/user/Desktop/references.bib" + "nautilus", + "/home/user/Desktop/book" ] } }, { - "type": "download", + "type": "open", "parameters": { - "files": [ - { - "url": "https://drive.usercontent.google.com/download?id=1NDYJ7XyiUVJYuMBDAYvObrSZdODrhUMi&export=download&authuser=0&confirm=t", - "path": "/home/user/Desktop/references.docx" - } - ] - } - }, - { - "type": "launch", - "parameters": { - "command": [ - "libreoffice", - "--writer", - "/home/user/Desktop/references.docx" - ] + "path": "/home/user/Desktop/book/Spectral Graph Theory.pdf" } } ], "trajectory": "trajectories/", "related_apps": [ - "chrome", + "os", "libreoffice_writer" ], "evaluator": { - "func": "compare_text_file", + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": [ + "/bin/bash", + "-c", + "cd /home/user/Desktop/book && zip -qr book.zip *.pdf" + ] + } + } + ], + "func": "compare_archive", "result": { "type": "vm_file", - "path": "/home/user/Desktop/references.bib", - "dest": "references.bib" + "path": "/home/user/Desktop/book/book.zip", + "dest": "book.zip" }, "expected": { "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1eeaPRSAWhzsNGFuda_u6phsdQMZHBkb2&export=download&authuser=0&confirm=t", - "dest": "gold_references.bib" + "path": "https://drive.usercontent.google.com/download?id=1oH9UAbJe4EmPxwDX4kQPVyKqGF2y2pQh&export=download&authuser=0&confirm=t", + "dest": "gold_book.zip" }, "options": { - "ignore_blanks": true + "file_type": "pdf" } } } \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/aceb0368-56b8-4073-b70e-3dc9aee184e0.json b/evaluation_examples/examples/multi_apps/aceb0368-56b8-4073-b70e-3dc9aee184e0.json new file mode 100644 index 0000000..5d3394e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/aceb0368-56b8-4073-b70e-3dc9aee184e0.json @@ -0,0 +1,87 @@ +{ + "id": "aceb0368-56b8-4073-b70e-3dc9aee184e0", + "snapshot": "libreoffice_calc", + "instruction": "I am grading students' English exam papers, but the test consists only of multiple-choice questions. It's too exhausting to check each question one by one and record the detailed scores. Can you help me compare the remaining students' answers with the answer key and record the detailed scoring in the opened spreadsheet?", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1CfzBZxhhV1_vUKhxsP_4M574-XFm1qIo&export=download&authuser=0&confirm=t", + "path": "/home/user/exam.zip" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "unzip", + "/home/user/exam.zip", + "-d", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--writer", + "/home/user/exam/ReferenceAnswers.docx" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "libreoffice", + "--calc", + "/home/user/exam/grades.xlsx" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "nautilus", + "/home/user/exam/" + ] + + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "thunderbird", + "os" + ], + "evaluator": { + "func": "compare_table", + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?id=1a0gZwixcJuZAUtqGR3L5P20JmoWfQU_d&export=download&authuser=0&confirm=t", + "dest": "gold_grades.xlsx" + }, + "result": { + "type": "vm_file", + "path": "/home/user/exam/grades.xlsx", + "dest": "grades.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json b/evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json new file mode 100644 index 0000000..dcd790e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/c2751594-0cd5-4088-be1b-b5f2f9ec97c4.json @@ -0,0 +1,58 @@ +{ + "id": "c2751594-0cd5-4088-be1b-b5f2f9ec97c4", + "snapshot": "thunderbird", + "instruction": "Help me export the first image from the doc file attached in the most recent email in Notes folder, and set this image as the new desktop background.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1JcDjBGBP5ly90pdVLs3ySoe6qX9Ht_d6&export=download&authuser=0&confirm=t", + "path": "/home/user/thunderbird-profile.tar.gz" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "tar", + "--recursive-unlink", + "-xz", + "-f", + "/home/user/thunderbird-profile.tar.gz", + "-C", + "/home/user/" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "/usr/bin/thunderbird" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "thunderbird", + "os" + ], + "evaluator": { + "func": "compare_images", + "result": { + "type": "vm_wallpaper", + "dest": "background.png" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1iIdjmXD6fRSYtpqxujTGzx8agvIgXsqz&export=download&authuser=0&confirm=t", + "dest": "gold_background.png" + } + } +} \ No newline at end of file