From 5ebd080237898843157b8b77263781faff354c67 Mon Sep 17 00:00:00 2001 From: BlankCheng <913501223@qq.com> Date: Wed, 6 Mar 2024 21:33:38 +0800 Subject: [PATCH] Update multi-app examples --- desktop_env/evaluators/metrics/__init__.py | 10 +- desktop_env/evaluators/metrics/gimp.py | 102 ++++++++++++++---- desktop_env/evaluators/metrics/vscode.py | 51 +++++++++ .../d16c99dc-2a1e-46f2-b350-d97c86c85c15.json | 32 ++++-- .../3c8f201a-009d-4bbe-8b65-a6f8b35bb57f.json | 37 +++++++ .../42f4d1c7-4521-4161-b646-0a8934e36081.json | 71 ++++++++++++ .../7f35355e-02a6-45b5-b140-f0be698bcf85.json | 43 ++++++++ .../91190194-f406-4cd6-b3f9-c43fac942b22.json | 51 +++++++++ .../98e8e339-5f91-4ed2-b2b2-12647cb134f4.json | 54 ++++++++++ .../bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108.json | 69 ++++++++++++ .../d68204bf-11c1-4b13-b48b-d303c73d4bf6.json | 51 +++++++++ .../e8172110-ec08-421b-a6f5-842e6451911f.json | 67 ++++++++++++ 12 files changed, 606 insertions(+), 32 deletions(-) create mode 100644 evaluation_examples/examples/multi_apps/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f.json create mode 100644 evaluation_examples/examples/multi_apps/42f4d1c7-4521-4161-b646-0a8934e36081.json create mode 100644 evaluation_examples/examples/multi_apps/7f35355e-02a6-45b5-b140-f0be698bcf85.json create mode 100644 evaluation_examples/examples/multi_apps/91190194-f406-4cd6-b3f9-c43fac942b22.json create mode 100644 evaluation_examples/examples/multi_apps/98e8e339-5f91-4ed2-b2b2-12647cb134f4.json create mode 100644 evaluation_examples/examples/multi_apps/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108.json create mode 100644 evaluation_examples/examples/multi_apps/d68204bf-11c1-4b13-b48b-d303c73d4bf6.json create mode 100644 evaluation_examples/examples/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f.json diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 527dbe5..6d49742 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -64,7 +64,7 @@ from .gimp import ( check_brightness_decrease_and_structure_sim, check_contrast_increase_and_structure_sim, check_saturation_increase_and_structure_sim, - check_image_size_and_structure_sim, + check_image_size, check_image_mirror, check_palette_and_structure_sim, check_textbox_on_leftside, @@ -77,7 +77,9 @@ from .gimp import ( increase_saturation, decrease_brightness, check_file_exists, - compare_triangle_positions + compare_triangle_positions, + check_sharper, + check_image_file_size ) from .libreoffice import check_libre_locale from .pdf import check_pdf_pages @@ -121,11 +123,13 @@ from .vscode import ( compare_text_file, compare_config, compare_answer, + compare_result_files, is_extension_installed, check_json_settings, check_json_keybindings, check_python_file_by_test_suite, - check_python_file_by_gold_file + check_python_file_by_gold_file, + check_html_background_image ) diff --git a/desktop_env/evaluators/metrics/gimp.py b/desktop_env/evaluators/metrics/gimp.py index 30c9b68..e46f5d5 100644 --- a/desktop_env/evaluators/metrics/gimp.py +++ b/desktop_env/evaluators/metrics/gimp.py @@ -5,7 +5,7 @@ from PIL import Image, ImageChops, ImageStat def compare_image_list(pred_img_path_list: Union[str, List[str]], - gold_img_path_list: Union[str, List[str]]) -> float: + gold_img_path_list: Union[str, List[str]]) -> float: """ Compare two image lists, only if all images are the same, return 1.0, otherwise return 0.0 """ if type(pred_img_path_list) != list: @@ -177,6 +177,16 @@ def calculate_contrast(image): return np.std(pixels) +def calculate_image_sharpness(image_path): + # Load the image in grayscale + image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) + # Apply the Laplacian operator + laplacian = cv2.Laplacian(image, cv2.CV_64F) + # Calculate the variance + variance = np.var(laplacian) + return variance + + def structure_check_by_mse(img1, img2, threshold=0.03): """Check if two images are approximately the same by MSE""" mse = np.mean( @@ -295,7 +305,8 @@ def check_triangle_position(tgt_path): # We assume the triangle is a different color from the background # Find the unique colors - unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0, return_counts=True) + unique_colors, counts = np.unique(img_array.reshape(-1, img_array.shape[2]), axis=0, + return_counts=True) unique_colors_sorted = unique_colors[np.argsort(counts)] # Assuming the background is the most common color and the triangle is a different color @@ -337,6 +348,25 @@ def check_structure_sim(src_path, tgt_path): return structure_same +def check_structure_sim_resized(src_path, tgt_path): + """ + Check if the structure of the two images are similar after resizing. + gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15 + """ + if src_path is None or tgt_path is None: + return 0. + + img_src = Image.open(src_path) + img_tgt = Image.open(tgt_path) + + # Resize the images to the same size + img_src = img_src.resize(img_tgt.size) + + # Check if the structure is similar + structure_same = structure_check_by_ssim(img_src, img_tgt) + return structure_same + + def check_contrast_increase_and_structure_sim(src_path, tgt_path): """ Check if the src image has higher contrast than the tgt image and the structures are similar @@ -388,34 +418,28 @@ def check_config_status(actual_config_path, rule): return 0. -def check_image_size_and_structure_sim(src_path, tgt_path, height=512, width=None): +def check_image_size(src_path, rule): """ - Check if the size of the src image is correct and the structure of the two images are similar. - gimp:d16c99dc-2a1e-46f2-b350-d97c86c85c15 + Check if the size of the src image is correct + multi-apps:42f4d1c7-4521-4161-b646-0a8934e36081 """ - - if src_path is None or tgt_path is None: + if src_path is None: return 0. - # Load images - source_image = Image.open(src_path) - target_image = Image.open(tgt_path) + # Load the image + img = Image.open(src_path) - # Check size - if width is not None: - width_same = source_image.size[0] == width - else: - width_same = True - if height is not None: - height_same = source_image.size[1] == height + # Check the size + if rule["height"] is not None: + height_same = img.size[1] == rule["height"] else: height_same = True + if rule["width"] is not None: + width_same = img.size[0] == rule["width"] + else: + width_same = True - # Check structure - resized_target_image = target_image.resize(source_image.size) - structure_same = structure_check_by_ssim(source_image, resized_target_image) - - if width_same and height_same and structure_same: + if height_same and width_same: return 1. else: return 0. @@ -521,6 +545,31 @@ def check_green_background(src_path, tgt_path): return 1. +def check_sharper(src_path, tgt_path): + """ + Check if the source image is sharper than the target image. + multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108 + """ + sharpness_src = calculate_image_sharpness(src_path) + sharpness_tgt = calculate_image_sharpness(tgt_path) + return 1.0 if sharpness_src > sharpness_tgt else 0.0 + + +def check_image_file_size(src_path, rule): + """ + Check if the size of the src image within 500KB + """ + if src_path is None: + return 0.0 + + # Check the size + file_size = os.path.getsize(src_path) + if file_size < rule["max_size"]: + return 1.0 + else: + return 0.0 + + if __name__ == "__main__": actual_config_path = "../../../cache/sessionrc_test" rule = { @@ -550,3 +599,12 @@ if __name__ == "__main__": tgt_path = "../../../cache/f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce/Triangle_In_The_Middle.png" print(check_triangle_position(tgt_path)) + src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi_sharper.png" + tgt_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/anmi.png" + print(check_sharper(src_path, tgt_path)) + + src_path = "../../../cache/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f/compressed.jpeg" + rule = { + "max_size": 500000 + } + print(check_image_file_size(src_path, rule)) \ No newline at end of file diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py index ecf4e10..2331a75 100644 --- a/desktop_env/evaluators/metrics/vscode.py +++ b/desktop_env/evaluators/metrics/vscode.py @@ -161,3 +161,54 @@ def check_python_file_by_test_suite(actual_files, test_file, **options) -> float def check_python_file_by_gold_file(actual_files, gold_file: str, **options) -> float: pass + + +def check_html_background_image(src_path: str, rule: Dict = None) -> float: + """ + Check if the background image is correctly set. + multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108 + """ + from bs4 import BeautifulSoup + with open(src_path, 'r') as f: + html_content = f.read() + soup = BeautifulSoup(html_content, 'html.parser') + styles = soup.find_all('style') + for style in styles: + if f'background-image: url(\'{rule["value"]}\')' in style.text: + return 1.0 + return 0.0 + + +def compare_result_files(src_path, tgt_path): + """ + Compare whether the content of two files are the same. + multi-app:7f35355e-02a6-45b5-b140-f0be698bcf85 + """ + with open(src_path, 'r') as f: + src_content = f.read().strip() + with open(tgt_path, 'r') as f: + tgt_content = f.read().strip() + try: + # Compare the content as numbers + tgt_content_num = float(tgt_content) + if tgt_content in src_content: + # If the content of tgt is in src, return 1.0 since output src might be + # a superset(language description+number) of tgt + return 1.0 + src_content_num = float(src_content) + if abs(src_content_num - tgt_content_num) < 1e-4: + return 1.0 + return 0.0 + except: + if src_content == tgt_content: + return 1.0 + return 0.0 + + +if __name__ == "__main__": + src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/index.html" + rule = { + "type:": "value", + "value": "anmi_sharper.png" + } + print(check_html_background_image(src_path, rule)) diff --git a/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json b/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json index b5b1cf2..3029c0c 100644 --- a/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json +++ b/evaluation_examples/examples/gimp/d16c99dc-2a1e-46f2-b350-d97c86c85c15.json @@ -84,16 +84,34 @@ } } ], - "func": "check_image_size_and_structure_sim", - "expected":{ + "func": [ + "check_image_size", + "check_structure_sim" + ], + "expected": [ + { "type": "vm_file", "path": "/home/user/Desktop/dog_with_background.png", "dest": "dog_with_background.png" }, - "result": { - "type": "vm_file", - "path": "/home/user/Desktop/resized.png", - "dest": "resized.png" - } + { + "type": "vm_file", + "path": "/home/user/Desktop/dog_with_background.png", + "dest": "dog_with_background.png" + } + ], + "result": [ + { + "type": "rule", + "rules": { + "height": 512 + } + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/resized.png", + "dest": "resized.png" + } + ] } } \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f.json b/evaluation_examples/examples/multi_apps/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f.json new file mode 100644 index 0000000..309f370 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/3c8f201a-009d-4bbe-8b65-a6f8b35bb57f.json @@ -0,0 +1,37 @@ +{ + "id": "3c8f201a-009d-4bbe-8b65-a6f8b35bb57f", + "snapshot": "gimp", + "instruction": "Use `gdown` to download the image from \"https://drive.google.com/uc?export=download&id=1i8j5dGS57sA07jEuPNAlQW-sn5uqUnuK\", and then use GIMP to compress it to under 600KB. Resize if needed.", + "source": "", + "config": [ + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"alt\", \"t\"); time.sleep(0.5);" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "gimp", + "os" + ], + "evaluator": { + "func": "check_image_file_size", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/compressed.jpeg", + "dest": "compressed.jpeg" + }, + "expected": { + "type": "rule", + "rules": { + "max_size": 600000 + } + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/42f4d1c7-4521-4161-b646-0a8934e36081.json b/evaluation_examples/examples/multi_apps/42f4d1c7-4521-4161-b646-0a8934e36081.json new file mode 100644 index 0000000..a6adb54 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/42f4d1c7-4521-4161-b646-0a8934e36081.json @@ -0,0 +1,71 @@ +{ + "id": "42f4d1c7-4521-4161-b646-0a8934e36081", + "snapshot": "gimp", + "instruction": "Configure VS Code to edit GIMP script-fu scripts effectively by installing lisp extension. Test by writing code to resizing the image as 128 * 128 as \"resized.png\"", + "source": "", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.google.com/uc?export=download&id=1yrWU5HimYPNUjdtvw1a218kh50fPVtZ3", + "path": "/home/user/Desktop/character.png" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "code" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "gimp", + "vs_code" + ], + "evaluator": { + "func": [ + "is_extension_installed", + "check_image_size" + ], + "result": [ + { + "type": "vm_command_line", + "command": [ + "code", + "--list-extensions", + "|", + "grep", + "mattn.lisp" + ] + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/resized.png", + "dest": "resized.png" + } + ], + "expected": [ + { + "type": "rule", + "rules": { + "type": "contain", + "expected": "mattn.lisp" + } + }, + { + "type": "rule", + "rules": { + "height": 128, + "width": 128 + } + } + ] + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/7f35355e-02a6-45b5-b140-f0be698bcf85.json b/evaluation_examples/examples/multi_apps/7f35355e-02a6-45b5-b140-f0be698bcf85.json new file mode 100644 index 0000000..c33b042 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/7f35355e-02a6-45b5-b140-f0be698bcf85.json @@ -0,0 +1,43 @@ +{ + "id": "7f35355e-02a6-45b5-b140-f0be698bcf85", + "snapshot": "libreoffice_calc", + "instruction": "Export the table to a CSV file and then help me write code to find the medium price (fill empty value with average). Save the result in \"result.txt\".", + "source": "", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://docs.google.com/spreadsheets/d/13YL-KC__pav2qp3sFDs1BT2wZnpWGp7s/export?format=xlsx", + "path": "/home/user/Desktop/stock.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Desktop/stock.xlsx" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_calc", + "vs_code" + ], + "evaluator": { + "func": "compare_result_files", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/result.txt", + "dest": "result.txt" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?export=download&id=1oPPW_dozWGII5MRmdXdKKoEK5iBkd_8Q", + "dest": "result_gold.txt" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/91190194-f406-4cd6-b3f9-c43fac942b22.json b/evaluation_examples/examples/multi_apps/91190194-f406-4cd6-b3f9-c43fac942b22.json new file mode 100644 index 0000000..ba554e0 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/91190194-f406-4cd6-b3f9-c43fac942b22.json @@ -0,0 +1,51 @@ +{ + "id": "91190194-f406-4cd6-b3f9-c43fac942b22", + "snapshot": "gimp", + "instruction": "Launch GIMP from the command line to edit \"cola.png\" and crop the top 20% off the image for my avatar as \"cropped.png\".", + "source": "", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.google.com/uc?export=download&id=1bmSRNNh4JkF6izrKrmynUHarf0pFES50", + "path": "/home/user/Desktop/cola.png" + }, + { + "url": "https://drive.google.com/uc?export=download&id=1MayrIPJWRK7cMEVe3TxYmgkAbVMrYcQA", + "path": "/home/user/Desktop/cropped_gold.png" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"alt\", \"t\"); time.sleep(0.5);" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "gimp", + "os" + ], + "evaluator": { + "func": "check_structure_sim", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/cropped.png", + "dest": "cropped.png" + }, + "expected": { + "type": "vm_file", + "path": "/home/user/Desktop/cropped_gold.png", + "dest": "cropped_gold.png" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/98e8e339-5f91-4ed2-b2b2-12647cb134f4.json b/evaluation_examples/examples/multi_apps/98e8e339-5f91-4ed2-b2b2-12647cb134f4.json new file mode 100644 index 0000000..3d08199 --- /dev/null +++ b/evaluation_examples/examples/multi_apps/98e8e339-5f91-4ed2-b2b2-12647cb134f4.json @@ -0,0 +1,54 @@ +{ + "id": "98e8e339-5f91-4ed2-b2b2-12647cb134f4", + "snapshot": "vs_code", + "instruction": "Merge the contents of all .txt files from your vscode project into a single document in Writer. No merging separator is needed. Ensure to set the overall font size of the document to 10.", + "source": "", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.google.com/uc?export=download&id=1dqbi6j3zPOqFTMFHVVaEOXSdHz5zIey1", + "path": "/home/user/Desktop/doc_proc.zip" + } + ] + } + }, + { + "type": "command", + "parameters": { + "command": "mkdir -p /home/user/Desktop/doc_proc/; unzip /home/user/Desktop/doc_proc.zip -d /home/user/Desktop/doc_proc/", + "shell": true + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "code", + "-g", + "/home/user/Desktop/doc_proc/" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "vs_code", + "libreoffice_writer" + ], + "evaluator": { + "func": "compare_docx_files", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/concat.docx", + "dest": "concat.docx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.google.com/uc?export=download&id=10k_8T7Hk9KQ2I-AbNK56hvXR6yv0Pemc", + "dest": "concat_gold.docx" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108.json b/evaluation_examples/examples/multi_apps/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108.json new file mode 100644 index 0000000..7b1529e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108.json @@ -0,0 +1,69 @@ +{ + "id": "bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108", + "snapshot": "gimp", + "instruction": "I have a background image named 'anmi.png'. I need the image to be enhanced for better sharpness, as 'anmi_sharper.png'. Next, please use it as the background image for my 'index.html' page.", + "source": "", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.google.com/uc?export=download&id=1BAZvR0BflPh70ClL2bHrbJ-xyV-nNQ1o", + "path": "/home/user/Desktop/anmi.png" + }, + { + "url": "https://drive.google.com/uc?export=download&id=1zcyY7y_deKStc-AtU8pFFrJItOuyfrNK", + "path": "/home/user/Desktop/index.html" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "gimp", + "/home/user/Desktop/anmi.png" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "gimp", + "vs_code" + ], + "evaluator": { + "func": [ + "check_sharper", + "check_html_background_image" + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/anmi_sharper.png", + "dest": "anmi_sharper.png" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/index.html", + "dest": "index.html" + } + ], + "expected": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/anmi.png", + "dest": "anmi.png" + }, + { + "type": "rule", + "rules": { + "type:": "value", + "value": "anmi_sharper.png" + } + } + ] + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/d68204bf-11c1-4b13-b48b-d303c73d4bf6.json b/evaluation_examples/examples/multi_apps/d68204bf-11c1-4b13-b48b-d303c73d4bf6.json new file mode 100644 index 0000000..55af9af --- /dev/null +++ b/evaluation_examples/examples/multi_apps/d68204bf-11c1-4b13-b48b-d303c73d4bf6.json @@ -0,0 +1,51 @@ +{ + "id": "d68204bf-11c1-4b13-b48b-d303c73d4bf6", + "snapshot": "gimp", + "instruction": "Divide my image vertically into three equal sections with command line. Then rearrange them in order with a gradient of warm tones, progressively becoming warmer from left to right as a new image \"rearranged.png\".", + "source": "", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.google.com/uc?export=download&id=1CPGW_OZsfSWDdTU7CFrTjpzSAASyLy4w", + "path": "/home/user/Desktop/tilearray.png" + }, + { + "url": "https://drive.google.com/uc?export=download&id=1aHwmnxL2CKEh_FhVpevY452-BQH2t5rG", + "path": "/home/user/Desktop/rearranged_gold.png" + } + ] + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"alt\", \"t\"); time.sleep(0.5);" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "gimp", + "os" + ], + "evaluator": { + "func": "check_structure_sim", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/rearranged.png", + "dest": "rearranged.png" + }, + "expected": { + "type": "vm_file", + "path": "/home/user/Desktop/rearranged_gold.png", + "dest": "rearranged_gold.png" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f.json b/evaluation_examples/examples/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f.json new file mode 100644 index 0000000..b93696e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/e8172110-ec08-421b-a6f5-842e6451911f.json @@ -0,0 +1,67 @@ +{ + "id": "e8172110-ec08-421b-a6f5-842e6451911f", + "snapshot": "gimp", + "instruction": "Open 'character.png' in GIMP and extract the pixel art character. Save the selected character as 'character_gimp.png'. Additionally, write a Python script to automate this selection process, ensuring it precisely mimics the manual extraction done in GIMP. Output the result from the script as 'character_code.png'.", + "source": "", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.google.com/uc?export=download&id=1Tf7MvqBMHuIhvKmk-Ryr_qmzmkACFjxB", + "path": "/home/user/Desktop/character.png" + }, + { + "url": "https://drive.google.com/uc?export=download&id=1wKG5X6LaN0tShsAK4lFg3OyFg8OGYYZg", + "path": "/home/user/Desktop/character_no_background_gold.png" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "gimp", + "/home/user/Desktop/character.png" + ] + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "gimp", + "vs_code" + ], + "evaluator": { + "func": [ + "check_structure_sim", + "check_structure_sim" + ], + "result": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/character_gimp.png", + "dest": "character_gimp.png" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/character_code.png", + "dest": "character_code.png" + } + ], + "expected": [ + { + "type": "vm_file", + "path": "/home/user/Desktop/character_no_background_gold.png", + "dest": "character_no_background_gold.png" + }, + { + "type": "vm_file", + "path": "/home/user/Desktop/character_no_background_gold.png", + "dest": "character_no_background_gold.png" + } + ] + } +} \ No newline at end of file