From 69ef653a7c35049a877370d54023f9fe356534bd Mon Sep 17 00:00:00 2001 From: tsuky_chen <3107760494@qq.com> Date: Tue, 5 Mar 2024 22:46:56 +0800 Subject: [PATCH] update multi apps --- desktop_env/evaluators/metrics/__init__.py | 9 +- desktop_env/evaluators/metrics/chrome.py | 33 +++++++ desktop_env/evaluators/metrics/docs.py | 23 +++++ desktop_env/evaluators/metrics/vscode.py | 29 +++++++ .../20236825-b5df-46e7-89bf-62e1d640a897.json | 58 +++++++++++++ .../227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json | 69 +++++++++++++++ .../8df7e444-8e06-4f93-8a1a-c5c974269d82.json | 47 ++++++++++ .../a503b07f-9119-456b-b75d-f5146737d24f.json | 47 ++++++++++ .../b337d106-053f-4d37-8da0-7f9c4043a66b.json | 85 +++++++++++++++++++ 9 files changed, 397 insertions(+), 3 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json create mode 100644 evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json create mode 100644 evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json create mode 100644 evaluation_examples/examples/multi_apps/a503b07f-9119-456b-b75d-f5146737d24f.json create mode 100644 evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 527dbe5..c965a95 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -18,7 +18,8 @@ from .chrome import ( is_expected_search_query, is_expected_active_tab, is_expected_url_pattern_match, - is_added_to_steam_cart + is_added_to_steam_cart, + compare_pdf_images ) from .docs import ( compare_font_names, @@ -45,7 +46,8 @@ from .docs import ( is_first_line_centered, check_file_exists, check_tabstops, - compare_contains_image + compare_contains_image, + compare_docx_images ) from .general import ( check_csv, @@ -125,7 +127,8 @@ from .vscode import ( check_json_settings, check_json_keybindings, check_python_file_by_test_suite, - check_python_file_by_gold_file + check_python_file_by_gold_file, + compare_zip_files ) diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py index 0a76bf0..3c367b3 100644 --- a/desktop_env/evaluators/metrics/chrome.py +++ b/desktop_env/evaluators/metrics/chrome.py @@ -129,6 +129,39 @@ def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[st logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}") return score / len(pdf2_path) +import fitz +from PIL import Image +from io import BytesIO + +def compare_pdf_images(pdf1_path: str, pdf2_path: str, **kwargs) -> float: + def extract_images_from_pdf(pdf_path): + pdf_document = fitz.open(pdf_path) + images = [] + + for page_number in range(pdf_document.page_count): + page = pdf_document[page_number] + image_list = page.get_images(full=True) + + for img_index, img_info in enumerate(image_list): + base_image = pdf_document.extract_image(img_index) + image_bytes = base_image["image"] + + images.append(BytesIO(image_bytes)) + + return images + + images1 = extract_images_from_pdf(pdf1_path) + images2 = extract_images_from_pdf(pdf2_path) + + if len(images1) != len(images2): + return 0. + + for i, (img1, img2) in enumerate(zip(images1, images2), 1): + if Image.open(img1).tobytes() != Image.open(img2).tobytes(): + return 0. + + return 1. + def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float: """ diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index a17da39..b44df9c 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -159,6 +159,29 @@ def compare_docx_tables(docx_file1, docx_file2): return 1 +from io import BytesIO +from PIL import Image + +def compare_docx_images(docx_file1, docx_file2): + doc1 = Document(docx_file1) + doc2 = Document(docx_file2) + + def extract_images(doc): + images = [] + for rel in doc.part.rels.values(): + if "image" in rel.reltype: + img_data = rel.target_part.blob + images.append(BytesIO(img_data)) + return images + + images1 = extract_images(doc1) + images2 = extract_images(doc2) + if len(images1) != len(images2): + return 0 + for img1, img2 in zip(images1, images2): + if Image.open(img1).tobytes() != Image.open(img2).tobytes(): + return 0 + return 1 def compare_line_spacing(docx_file1, docx_file2): if not compare_docx_files(docx_file1, docx_file2): diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py index ecf4e10..a3e6779 100644 --- a/desktop_env/evaluators/metrics/vscode.py +++ b/desktop_env/evaluators/metrics/vscode.py @@ -90,6 +90,35 @@ def compare_text_file(actual: str, expected: str, **options) -> float: return 1.0 return 0.0 +import zipfile + +def compare_zip_files(actual: str, expected: str, **options) -> float: + """ + Args: + actual (str): path to result zip file + expected (str): path to gold zip file + + Return: + float: the score + """ + if not actual: + return 0. + + with zipfile.ZipFile(actual, 'r') as zip_file1, zipfile.ZipFile(expected, 'r') as zip_file2: + file_list1 = set(zip_file1.namelist()) + file_list2 = set(zip_file2.namelist()) + + if file_list1 != file_list2: + return 0.0 + + for file_name in file_list1: + content1 = zip_file1.read(file_name) + content2 = zip_file2.read(file_name) + + if content1 != content2: + return 0.0 + return 1.0 + def compare_config(actual: str, rules: Dict, **options) -> float: if not actual: diff --git a/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json b/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json new file mode 100644 index 0000000..550755b --- /dev/null +++ b/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json @@ -0,0 +1,58 @@ +{ + "id": "20236825-b5df-46e7-89bf-62e1d640a897", + "snapshot": "vscode", + "instruction": "I am coding on my algorithm practice. The doc \"bubble_Sort_tutorial.docx\" is the document for it. Help me finish the function 'bubbleSort' in 'bubbleSort.py' on the Desktop save the output in 'res.txt' on Desktop.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1AQtZ8Hrf7WtyUtaHMtDN-UErKRXnW64d&export=download&authuser=0&confirm=t&uuid=bf7f2b4b-ecf9-4260-b74e-db0cd40b58ca&at=APZUnTVimJzbmwJ8-4E1lq9ipJf_:1709624149129", + "path": "/home/user/Desktop/bubbleSort.zip" + }, + { + "url": "https://drive.usercontent.google.com/download?id=168ZHCnK6v5PEZ8G5M25ZUW9fICk4OlfE&export=download&authuser=0&confirm=t&uuid=3642df08-dc40-4d37-93a0-8532e3012fb0&at=APZUnTUP1OTlq0kIgqcj7YSWw6MB:1709622592489", + "path": "/home/user/Desktop/Bubble_Sort_tutorial.docx" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "/bin/bash", + "-c", + "unzip /home/user/Desktop/bubbleSort.zip -d /home/user/Desktop/ && rm -rf /home/user/Desktop/bubbleSort.zip" + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/Bubble_Sort_tutorial.docx" + } + } + ], + "trajectory": "trajectories/20236825-b5df-46e7-89bf-62e1d640a897", + "related_apps": [ + "vscode", + "libreoffice_writer", + "os" + ], + "evaluator": { + "func": "compare_text_file", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1g2Trt9oxQyW_sx8aIztFA0zNsE4yNw2x&export=download&authuser=0&confirm=t&uuid=342751c4-54f1-4760-9326-e7388845ded0&at=APZUnTV5BcbaxIZrDglWbs84Oxln:1709623697315", + "dest": "res.txt" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/res.txt", + "dest": "res.txt" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json b/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json new file mode 100644 index 0000000..f186383 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json @@ -0,0 +1,69 @@ +{ + "id": "227d2f97-562b-4ccb-ae47-a5ec9e142fbb", + "snapshot": "gimp", + "instruction": "I have my .xcf file saved on Desktop. Could you help me copy the image and paste it into a Libreoffice Writer file? Save it as 'image.docx' on the Desktop.", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "path": "/home/user/Desktop/QTdHniCqfJbBLJe3L3nijU-1200-80.xcf", + "url": "https://drive.usercontent.google.com/download?id=1BGoDOu9bYIG7Twj5dVTxRIWDUgWzzDtP&export=download&authuser=0&confirm=t&uuid=235d1bb2-37a0-4d96-82bf-a87f31d03cb4&at=APZUnTX53EdR1stASFS3OH5luAtB:1709617456061" + } + ] + } + } + ], + "trajectory": "trajectories/227d2f97-562b-4ccb-ae47-a5ec9e142fbb", + "related_apps": [ + "libreoffice_writer", + "gimp", + "os" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "image.docx - LibreOffice Writer", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5);" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 1.0 + } + } + ], + "func": "compare_docx_images", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/image.docx", + "dest": "image.docx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=11kWQc1XFEqcIMuW0-NnZRSdv1199OmVI&export=download&authuser=0&confirm=t&uuid=694676fd-1ac9-4501-8acf-f48018494c7f&at=APZUnTV-koL51ka5dHum_HpGywv_:1709618406292", + "dest": "image.docx" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json new file mode 100644 index 0000000..12263d2 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json @@ -0,0 +1,47 @@ +{ + "id": "8df7e444-8e06-4f93-8a1a-c5c974269d82", + "snapshot": "libreoffice_writer", + "instruction": "In the \"reminder.docx\" on Desktop is the submission instruction of our essay work. My essay is saved as docx file in /home/user. Please help me prepare the files for submission as required. ", + "source": "authors", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1wrCJd2f0xYnrHcj6KDzCe96X9JsN3VI3&export=download&authuser=0&confirm=t&uuid=277cb94d-1981-4f4d-b1ba-bceac8146001&at=APZUnTWKU5DBnr_6-_ZlEdsvhpCz:1709633482673", + "path": "/home/user/Recruitment_and_retention_of_health_professionals_across_Europe.docx" + }, + { + "url": "https://drive.usercontent.google.com/download?id=1vzawJthEhQHcK4cUF0W9QT4zaFywO6aH&export=download&authuser=0&confirm=t&uuid=0fbb6a50-f9c1-44d2-b430-3af738d5fadc&at=APZUnTWyfv-N5f-EjnF8ob-VfCsD:1709633450986", + "path": "/home/user/Desktop/reminder.docx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/reminder.docx" + } + } + ], + "trajectory": "trajectories/8df7e444-8e06-4f93-8a1a-c5c974269d82", + "related_apps": [ + "libreoffice_writer", + "os" + ], + "evaluator": { + "func": "compare_zip_files", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1IKRu-dMFP4Aqzq5-4TOmOWVci0qvC27K&export=download&authuser=0&confirm=t&uuid=e2dabad2-5648-4bc3-a40f-f008089cd613&at=APZUnTVh5JD5nT3EvutwHIaSnJAT:1709633945616", + "dest": "Recruitment_and_retention_of_health_professionals_across_Europe.zip" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Recruitment_and_retention_of_health_professionals_across_Europe.zip", + "dest": "Recruitment_and_retention_of_health_professionals_across_Europe.zip" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/a503b07f-9119-456b-b75d-f5146737d24f.json b/evaluation_examples/examples/multi_apps/a503b07f-9119-456b-b75d-f5146737d24f.json new file mode 100644 index 0000000..f6f506f --- /dev/null +++ b/evaluation_examples/examples/multi_apps/a503b07f-9119-456b-b75d-f5146737d24f.json @@ -0,0 +1,47 @@ +{ + "id": "a503b07f-9119-456b-b75d-f5146737d24f", + "snapshot": "os", + "instruction": "I am preparing for my paper reading report. Could you help me download this image at https://github.com/xlang-ai/OpenAgents/blob/main/pics/openagents_overview.png and convert it to PDF format. Save it as \"openagents_overview.pdf\" on desktop.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + } + ], + "trajectory": "trajectories/a503b07f-9119-456b-b75d-f5146737d24f", + "related_apps": [ + "os", + "chrome", + "gimp", + "pdf" + ], + "evaluator": { + "func": "compare_pdfs", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1gD1odFNuLT6TP6rpAv_hot86pKcI5wY1&export=download&authuser=0&confirm=t&uuid=9d17c689-95d3-45e4-b093-0165de4045b4&at=APZUnTV9SssKQCoYyYeVsi8e9zcX:1709649491796", + "dest": "openagents_overview.pdf" + }, + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/openagents_overview.pdf", + "dest": "openagents_overview.pdf" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json b/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json new file mode 100644 index 0000000..9289558 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json @@ -0,0 +1,85 @@ +{ + "id": "b337d106-053f-4d37-8da0-7f9c4043a66b", + "snapshot": "os", + "instruction": "I've recently wanted to try using the Vim editor to edit code, but my Vim editor doesn't show relative line numbers by default. Please search the internet for a tutorial on how to display relative line numbers and make it the default setting for my local Vim.", + "source": "authors", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; time.sleep(0.5); pyautogui.click(960, 540); time.sleep(0.5); pyautogui.hotkey('ctrl', 'alt', 't'); time.sleep(0.5)" + ] + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Terminal" + } + } + ], + "trajectory": "trajectories/b337d106-053f-4d37-8da0-7f9c4043a66b", + "related_apps": [ + "os", + "chrome" + ], + "evaluator": { + "postconfig": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1CyhWjUS2oov4Fzc0VRwTh6LiS2Qu-T_8&export=download&authuser=0&confirm=t&uuid=9d0e2c62-895c-4bb3-a057-30cae60329ed&at=APZUnTVngSwARjYsWSmhSyHAqwID:1709647023362", + "path": "eval.sh" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": "chmod +x eval.sh", + "shell": true + } + } + ], + "func": "check_include_exclude", + "result": { + "type": "vm_command_line", + "command": "bash eval.sh", + "shell": true + }, + "expected": { + "type": "rule", + "rules": { + "include": [ + "The File Has Set Number!" + ], + "exclude": [] + } + } + } +} \ No newline at end of file