From be5d55a3f8144a0e7eb224cefdf30157708852b7 Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 1 Feb 2024 14:22:34 +0800 Subject: [PATCH 1/8] ver Feb1stv2 failed to start up experiments of multi_apps --- branch_flag | 2 +- experiment_screenshot_som.py | 27 ++++++++++++++++++++++++--- mm_agents/gpt_4v_agent.py | 3 ++- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/branch_flag b/branch_flag index 9daeafb..760637d 100644 --- a/branch_flag +++ b/branch_flag @@ -1 +1 @@ -test +exp_som diff --git a/experiment_screenshot_som.py b/experiment_screenshot_som.py index 5cddff7..0fbe534 100644 --- a/experiment_screenshot_som.py +++ b/experiment_screenshot_som.py @@ -133,7 +133,7 @@ def main(example_class, example_id): example = json.load(f) #example["snapshot"] = "exp_v1" # example["snapshot"] = "exp_setup4" - example["snapshot"] = "Snapshot 30" + example["snapshot"] = "Snapshot 34" logger.info("TASK: %s/%s", example_class, example_id) @@ -214,6 +214,27 @@ if __name__ == '__main__': , "94760984-3ff5-41ee-8347-cf1af709fea0" , "99146c54-4f37-4ab8-9327-5f3291665e1e" , "c9e7eaf2-b1a1-4efc-a982-721972fa9f02" + # 57, ^ thunderbird, v multi_apps + , "f8cfa149-d1c1-4215-8dac-4a0932bad3c2" + , "897e3b53-5d4d-444b-85cb-2cdc8a97d903" + , "4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc" + , "b52b40a5-ad70-4c53-b5b0-5650a8387052" + , "46407397-a7d5-4c6b-92c6-dbe038b1457b" + , "2b9493d7-49b8-493a-a71b-56cd1f4d6908" + , "51f5801c-18b3-4f25-b0c3-02f85507a078" + , "2c9fc0de-3ee7-45e1-a5df-c86206ad78b5" + , "510f64c8-9bcc-4be1-8d30-638705850618" + , "937087b6-f668-4ba6-9110-60682ee33441" + , "ee9a3c83-f437-4879-8918-be5efbb9fac7" + , "3680a5ee-6870-426a-a997-eba929a0d25c" + , "d9b7c649-c975-4f53-88f5-940b29c47247" + , "f7dfbef3-7697-431c-883a-db8583a4e4f9" + , "a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb" + , "78aed49a-a710-4321-a793-b611a7c5b56b" + , "c867c42d-a52d-4a24-8ae3-f75d256b5618" + , "e135df7c-7687-4ac0-a5f0-76b74438b53e" + , "58565672-7bfe-48ab-b828-db349231de6b" + , "2fe4b718-3bd7-46ec-bdce-b184f5653624" ] - for example_id in xx_list[42:]: - main("thunderbird", example_id) + for example_id in xx_list[57:]: + main("multi_apps", example_id) diff --git a/mm_agents/gpt_4v_agent.py b/mm_agents/gpt_4v_agent.py index 9810eff..7278c98 100644 --- a/mm_agents/gpt_4v_agent.py +++ b/mm_agents/gpt_4v_agent.py @@ -466,7 +466,8 @@ class GPT4v_Agent: "messages": messages, "max_tokens": self.max_tokens }) - except: + except Exception as e: + logger.warning("LLM INVOCATION ERROR: %s", str(e)) response = "" logger.debug("RESPONSE: %s", response) From 9df0854469630ee857048ca468c8104e3fe56d11 Mon Sep 17 00:00:00 2001 From: David Chang Date: Thu, 1 Feb 2024 22:56:09 +0800 Subject: [PATCH 2/8] ver Feb1stv3 rerun SoM experiment on thunderbird --- experiment_screenshot_som.py | 4 ++-- requirements.txt | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/experiment_screenshot_som.py b/experiment_screenshot_som.py index 0fbe534..304293e 100644 --- a/experiment_screenshot_som.py +++ b/experiment_screenshot_som.py @@ -236,5 +236,5 @@ if __name__ == '__main__': , "58565672-7bfe-48ab-b828-db349231de6b" , "2fe4b718-3bd7-46ec-bdce-b184f5653624" ] - for example_id in xx_list[57:]: - main("multi_apps", example_id) + for example_id in xx_list[42:43]: + main("thunderbird", example_id) diff --git a/requirements.txt b/requirements.txt index ab1dcf1..bce1ae0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,4 +39,6 @@ fastdtw odfpy openai func-timeout -beautifulsoup4 \ No newline at end of file +beautifulsoup4 +dashscope +google-generativeai From 538b9928fefd0d0151a86fe658e688591fa96406 Mon Sep 17 00:00:00 2001 From: rhythmcao Date: Fri, 2 Feb 2024 02:23:25 +0800 Subject: [PATCH 3/8] fix some problems in libreoffice writer --- desktop_env/evaluators/metrics/__init__.py | 1 + desktop_env/evaluators/metrics/docs.py | 54 +++++++++++++++++-- .../0810415c-bde4-4443-9047-d5f70165a697.json | 2 +- .../0a0faba3-5580-44df-965d-f562a99b291c.json | 12 +++-- .../8472fece-c7dd-4241-8d65-9b3cd1a0b568.json | 2 +- .../adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json | 2 +- 6 files changed, 61 insertions(+), 12 deletions(-) diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 3f67ebf..ec562a0 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -43,6 +43,7 @@ from .docs import ( compare_highlighted_text, is_first_line_centered, check_file_exists, + check_tabstops, compare_contains_image ) from .general import ( diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index 466483b..def8bf0 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -6,11 +6,13 @@ import zipfile from typing import List, Dict, Any from docx import Document -from docx.enum.text import WD_PARAGRAPH_ALIGNMENT +from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT from docx.shared import RGBColor from odf.opendocument import load from odf.text import P from odf.text import Span +from skimage.color import deltaE_ciede2000 +from skimage.color import rgb2lab logger = logging.getLogger("desktopenv.metric.docs") @@ -141,7 +143,7 @@ def compare_docx_tables(docx_file1, docx_file2): # Compare each cell for i in range(len(table1.rows)): for j in range(len(table1.columns)): - if table1.cell(i, j).text != table2.cell(i, j).text: + if table1.cell(i, j).text.strip() != table2.cell(i, j).text.strip(): return 0 return 1 @@ -234,6 +236,40 @@ def check_file_exists(directory, filename): return 1 if os.path.isfile(file_path) else 0 +def check_tabstops(docx_file1, docx_file2, **kwargs) -> float: + doc1: Document = Document(docx_file1) + doc2: Document = Document(docx_file2) + para1 = [p for p in doc1.paragraphs if p.text.strip()] + para2 = [p for p in doc2.paragraphs if p.text.strip()] + if len(para1) != len(para2): return .0 + + if kwargs.get('word_number_split_by_tabstop', None) is not None: + number = kwargs['word_number_split_by_tabstop'] + index = kwargs.get('index', 0) + for p1 in para1: + splits = p1.text.split('\t') + if len(splits) == 0: return .0 + words = list(filter(lambda x: x.strip(), re.split(r'\s', splits[index]))) + if len(words) != number: return .0 + + section = doc2.sections[0] + paragraph_width = section.page_width - section.left_margin - section.right_margin + ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0) + minus = .0 + for p1, p2 in zip(para1, para2): + # filter CLEAR tabstop and default left-0 tabstop + tabs1 = [tst for tst in p1.paragraph_format.tab_stops if not ignore_tabs(tst)] + tabs2 = [tst for tst in p2.paragraph_format.tab_stops if not ignore_tabs(tst)] + if len(tabs1) != len(tabs2): return .0 + difference = .0 + for t1, t2 in zip(tabs1, tabs2): + if t1.alignment != t2.alignment: return .0 + difference += abs(t1.position - t2.position) + minus += difference / paragraph_width + score = 1 - (minus / len(para1)) + return score + + def compare_contains_image(docx_file1, docx_file2): doc1 = Document(docx_file1) doc2 = Document(docx_file2) @@ -258,10 +294,18 @@ def compare_contains_image(docx_file1, docx_file2): # print(find_default_font("Ani", config_path)) -def evaluate_colored_words_in_tables(file_path1, file_path2): +def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs): if not compare_docx_files(file_path1, file_path2): return 0 document = Document(file_path1) + threshold = kwargs.get('threshold', 3.5) + + def _calculate_color_difference(rgb1, rgb2): + srgb1 = [rgb1[0] / 255.0, rgb1[1] / 255.0, rgb1[2] / 255.0] + srgb2 = [rgb2[0] / 255.0, rgb2[1] / 255.0, rgb2[2] / 255.0] + lab1, lab2 = rgb2lab(srgb1), rgb2lab(srgb2) + delta_e = deltaE_ciede2000(lab1, lab2) + return delta_e for table in document.tables: # Iterate through rows and cells in the table @@ -273,9 +317,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2): if word: first_letter = word[0].lower() - if first_letter in 'aeiou' and run.font.color.rgb != RGBColor(255, 0, 0): + if first_letter in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(255, 0, 0)) > threshold: return 0 # Vowel-colored words should be red - elif first_letter not in 'aeiou' and run.font.color.rgb != RGBColor(0, 0, 255): + elif first_letter not in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(0, 0, 255)) > threshold: return 0 # Non-vowel-colored words should be blue return 1 # All words in tables are correctly colored diff --git a/evaluation_examples/examples/libreoffice_writer/0810415c-bde4-4443-9047-d5f70165a697.json b/evaluation_examples/examples/libreoffice_writer/0810415c-bde4-4443-9047-d5f70165a697.json index 619707d..0471ab3 100644 --- a/evaluation_examples/examples/libreoffice_writer/0810415c-bde4-4443-9047-d5f70165a697.json +++ b/evaluation_examples/examples/libreoffice_writer/0810415c-bde4-4443-9047-d5f70165a697.json @@ -55,7 +55,7 @@ "func": "compare_line_spacing", "expected": { "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1-svVsH-l2ofufEKuN-cYrIrvXNobtATE&export=download&authuser=0&confirm=t&uuid=be7f891a-f858-48f5-a72d-4e42bbfb8b65&at=APZUnTXzBnaeSJjmxeh4zG03pzA0:1704179807785", + "path": "https://drive.usercontent.google.com/download?id=1-svVsH-l2ofufEKuN-cYrIrvXNobtATE&export=download&authuser=0&confirm=t&uuid=95ca5e2e-7fb3-4084-9f7b-a608a8277322&at=APZUnTXFO_571vyDp_r_LskPfq-j:1706796981024", "dest": "Novels_Intro_Packet_Gold.docx" }, "result": { diff --git a/evaluation_examples/examples/libreoffice_writer/0a0faba3-5580-44df-965d-f562a99b291c.json b/evaluation_examples/examples/libreoffice_writer/0a0faba3-5580-44df-965d-f562a99b291c.json index 5c2e737..b148787 100644 --- a/evaluation_examples/examples/libreoffice_writer/0a0faba3-5580-44df-965d-f562a99b291c.json +++ b/evaluation_examples/examples/libreoffice_writer/0a0faba3-5580-44df-965d-f562a99b291c.json @@ -1,7 +1,7 @@ { "id": "0a0faba3-5580-44df-965d-f562a99b291c", "snapshot": "libreoffice_writer", - "instruction": "I would like to make the first three words of the sentence left-aligned and the rest right-aligned. I basically want to have some empty space in the middle to add some photos. Assume that every sentence will have at least three words. Could you help me on alignment for me?", + "instruction": "I would like to make the first three words of the sentence left-aligned and the rest right-aligned. I basically want to have some empty space in the middle to add some photos. Assume that every sentence will have at least three words. Could you help me on alignment for me using tabstops?", "source": "https://stackoverflow.com/questions/64528055/how-to-make-part-of-my-sentence-left-aligned-and-rest-as-right-aligned", "config": [ { @@ -9,7 +9,7 @@ "parameters": { "files": [ { - "url": "https://drive.google.com/uc?id=1Wrjxsf184Go70TcRGM4Tohczh29Q9B_U&export=download", + "url": "https://drive.usercontent.google.com/download?id=1Wrjxsf184Go70TcRGM4Tohczh29Q9B_U&export=download&authuser=0&confirm=t&uuid=811f572f-03ee-47b9-8fd5-4978920ff425&at=APZUnTXcRTZAOb33QlpZ7-FT8I8Q:1706799959703", "path": "Desktop/04 CHIN9505 EBook Purchasing info 2021 Jan.docx" } ] @@ -52,16 +52,20 @@ } } ], - "func": "compare_init_lines", + "func": "check_tabstops", "expected": { "type": "cloud_file", - "path": "https://drive.google.com/uc?id=1yyHGj8KUHDMsZmc1QeJ1KkvSEGy83jMR&export=download", + "path": "https://drive.usercontent.google.com/download?id=1yyHGj8KUHDMsZmc1QeJ1KkvSEGy83jMR&export=download&authuser=0&confirm=t&uuid=32f8aa47-c590-4ece-bf65-65a0d683fcfa&at=APZUnTU1_BaeVgyB8GLJWfJrIAYh:1706802911129", "dest": "04 CHIN9505 EBook Purchasing info 2021 Jan_Gold.docx" }, "result": { "type": "vm_file", "path": "Desktop/04 CHIN9505 EBook Purchasing info 2021 Jan.docx", "dest": "04 CHIN9505 EBook Purchasing info 2021 Jan.docx" + }, + "options": { + "word_number_split_by_tabstop": 3, + "index": 0 } } } \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_writer/8472fece-c7dd-4241-8d65-9b3cd1a0b568.json b/evaluation_examples/examples/libreoffice_writer/8472fece-c7dd-4241-8d65-9b3cd1a0b568.json index 4a7c8f4..4c6a790 100644 --- a/evaluation_examples/examples/libreoffice_writer/8472fece-c7dd-4241-8d65-9b3cd1a0b568.json +++ b/evaluation_examples/examples/libreoffice_writer/8472fece-c7dd-4241-8d65-9b3cd1a0b568.json @@ -55,7 +55,7 @@ "func": "evaluate_colored_words_in_tables", "expected": { "type": "cloud_file", - "path": "https://drive.google.com/uc?id=1ksn444K17lFOdm5pELrQYvuZHkOsKq69&export=download", + "path": "https://drive.usercontent.google.com/download?id=1XmF-6ttL23xMK-j4P50qVGO4vgb6EgZR&export=download&authuser=0&confirm=t&uuid=fe5c16a5-3131-4a19-a6bf-c5e7faf341dd&at=APZUnTWDtqYGJvChovcgUVHDnvzy:1706807220392", "dest": "Dolch_Sight_Words_Primer_Gold.docx" }, "result": { diff --git a/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json b/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json index ef0bb84..b264f08 100644 --- a/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json +++ b/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json @@ -1,7 +1,7 @@ { "id": "adf5e2c3-64c7-4644-b7b6-d2f0167927e7", "snapshot": "libreoffice_writer", - "instruction": "Help me adding \"Steinberg, F. M., Bearden, M. M., & Keen, C. L. (2003). Cocoa and chocolate flavonoids: Implications for cardiovascular health. Journal of the American Dietetic Association, 103(2), 215-223. doi: 10.1053/jada.2003.50028\" to my reference list, and add a cross reference in the fourth paragraph where I marked \"\".", + "instruction": "Help me adding \"Steinberg, F. M., Bearden, M. M., & Keen, C. L. (2003). Cocoa and chocolate flavonoids: Implications for cardiovascular health. Journal of the American Dietetic Association, 103(2), 215-223. doi: 10.1053/jada.2003.50028\" to my reference list, and add a cross reference (using reference number) in the fourth paragraph where I marked \"\".", "source": "https://seekstar.github.io/2022/04/11/libreoffice%E5%BC%95%E7%94%A8%E6%96%87%E7%8C%AE/", "config": [ { From e96412fc9a22d5733be935f006b6ded2dbd7c2a3 Mon Sep 17 00:00:00 2001 From: tsuky_chen <3107760494@qq.com> Date: Fri, 2 Feb 2024 03:29:25 +0800 Subject: [PATCH 4/8] update windows exp libreoffice calc batch 1 --- .../01b269ae-2111-4a07-81fd-3fcd711993b0.json | 82 +++++++++++ .../0bf05a7d-b28b-44d2-955a-50b41e24012a.json | 109 ++++++++++++++ .../4e6fcf72-daf3-439f-a232-c434ce416af6.json | 134 ++++++++++++++++++ .../6054afcb-5bab-4702-90a0-b259b5d3217c.json | 90 ++++++++++++ .../7a4e4bc8-922c-4c84-865c-25ba34136be1.json | 82 +++++++++++ .../7efeb4b1-3d19-4762-b163-63328d66303b.json | 82 +++++++++++ .../8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json | 90 ++++++++++++ .../a9f325aa-8c05-4e4f-8341-9e4358565f4f.json | 82 +++++++++++ .../abed40dc-063f-4598-8ba5-9fe749c0615d.json | 82 +++++++++++ .../eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json | 82 +++++++++++ .../ecb0df7a-4e8d-4a03-b162-053391d3afaf.json | 109 ++++++++++++++ 11 files changed, 1024 insertions(+) create mode 100644 evaluation_examples/examples/Windows/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json create mode 100644 evaluation_examples/examples/Windows/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json create mode 100644 evaluation_examples/examples/Windows/libreoffice_calc/4e6fcf72-daf3-439f-a232-c434ce416af6.json create mode 100644 evaluation_examples/examples/Windows/libreoffice_calc/6054afcb-5bab-4702-90a0-b259b5d3217c.json create mode 100644 evaluation_examples/examples/Windows/libreoffice_calc/7a4e4bc8-922c-4c84-865c-25ba34136be1.json create mode 100644 evaluation_examples/examples/Windows/libreoffice_calc/7efeb4b1-3d19-4762-b163-63328d66303b.json create mode 100644 evaluation_examples/examples/Windows/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json create mode 100644 evaluation_examples/examples/Windows/libreoffice_calc/a9f325aa-8c05-4e4f-8341-9e4358565f4f.json create mode 100644 evaluation_examples/examples/Windows/libreoffice_calc/abed40dc-063f-4598-8ba5-9fe749c0615d.json create mode 100644 evaluation_examples/examples/Windows/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json create mode 100644 evaluation_examples/examples/Windows/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json diff --git a/evaluation_examples/examples/Windows/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json b/evaluation_examples/examples/Windows/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json new file mode 100644 index 0000000..2e3a28e --- /dev/null +++ b/evaluation_examples/examples/Windows/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json @@ -0,0 +1,82 @@ +{ + "id": "01b269ae-2111-4a07-81fd-3fcd711993b0", + "snapshot": "libreoffice_calc", + "instruction": "Fill all the blank cells with the value in the cell above it", + "source": "https://www.youtube.com/shorts/VrUzPTIwQ04", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1FuOZ-5YoKgLLwl_oZd4R3D8pZACf_ukS&export=download&authuser=0&confirm=t&uuid=2051e7a6-5930-4cef-8d77-20ebf66ec6e6&at=APZUnTX1fXqlxy6rluq-Kw-LUhS5:1705919461032", + "path": "C:\\Users\\user\\Student_Level_Fill_Blank.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Student_Level_Fill_Blank.xlsx" + } + } + ], + "trajectory": "trajectories/01b269ae-2111-4a07-81fd-3fcd711993b0", + "related_apps": [ + "msoffice_excel" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Student_Level_Fill_Blank.xlsx - Excel", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1HTle3vgdZSjJIK_wjXyjtWwbiYJeguwv&export=download&authuser=0&confirm=t&uuid=c5d0868b-bed2-48fb-949b-8a9f3f61e8cf&at=APZUnTVqS9CTZFJ1rPqCGQPDCv3p:1705919542916", + "dest": "Student_Level_Fill_Blank_gold.xlsx" + }, + "result": { + "type": "vm_file", + "path": "C:\\Users\\user\\Student_Level_Fill_Blank.xlsx", + "dest": "Student_Level_Fill_Blank.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/Windows/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json b/evaluation_examples/examples/Windows/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json new file mode 100644 index 0000000..3609427 --- /dev/null +++ b/evaluation_examples/examples/Windows/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json @@ -0,0 +1,109 @@ +{ + "id": "0bf05a7d-b28b-44d2-955a-50b41e24012a", + "snapshot": "libreoffice_calc", + "instruction": "I would like to pad all the numbers in the 'Old ID' column with zeros in front, to fill them up to seven digits in the 'New 7 Digit ID' column.", + "source": "https://www.youtube.com/shorts/FPAQaDTS8VY", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1DqGy5JRKOuZMRJ8O76d4Cds4WaRyz8V1&export=download&authuser=0&confirm=t&uuid=fa0694d1-2a77-4fd2-89d3-d9b854317823&at=APZUnTU9BxqG7E8tLZ104c0E8BEL:1705501029016", + "path": "C:\\Users\\user\\Customers_New_7digit_Id.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Customers_New_7digit_Id.xlsx" + } + } + ], + "trajectory": "trajectories/0bf05a7d-b28b-44d2-955a-50b41e24012a", + "related_apps": [ + "libreoffice calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Customers_New_7digit_Id.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "libreoffice", + "--convert-to", + "csv:Text - txt - csv (StarCalc):44,34,UTF-8,,,,false,true,true,false,false,1", + "--outdir", + "/home/user", + "C:\\Users\\user\\Customers_New_7digit_Id.xlsx" + ] + } + } + ], + "func": "compare_table", + "result": { + "type": "vm_file", + "path": [ + "C:\\Users\\user\\Customers_New_7digit_Id.xlsx", + "C:\\Users\\user\\Customers_New_7digit_Id-Sheet1.csv" + ], + "dest": [ + "Customers_New_7digit_Id.xlsx", + "Customers_New_7digit_Id-Sheet1.csv" + ], + "multi": true + }, + "expected": { + "type": "cloud_file", + "path": [ + "https://drive.usercontent.google.com/download?id=1zXz5k5A403IR0GE6DRRXRgQZrSIoVFSz&export=download&authuser=0&confirm=t&uuid=ba70b841-969c-4d91-9288-0011aeecf251&at=APZUnTWx3LL9udCgJAh-VMFzzfod:1705501007861", + "https://drive.usercontent.google.com/download?id=1h1GnUpyj92K7FXiHJ1xVaUYW_UYMDLPM&export=download&authuser=0&confirm=t&uuid=98de75bd-ba32-4ceb-97a7-b8d303a8dc96&at=APZUnTUewdfFIsyC3UlSlMcmmRbo:1705500978790" + ], + "dest": [ + "Customers_New_7digit_Id_gold.xlsx", + "Customers_New_7digit_Id_gold-Sheet1.csv" + ], + "multi": true + }, + "options": { + "rules": [ + { + "type": "sheet_print", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/Windows/libreoffice_calc/4e6fcf72-daf3-439f-a232-c434ce416af6.json b/evaluation_examples/examples/Windows/libreoffice_calc/4e6fcf72-daf3-439f-a232-c434ce416af6.json new file mode 100644 index 0000000..ea65248 --- /dev/null +++ b/evaluation_examples/examples/Windows/libreoffice_calc/4e6fcf72-daf3-439f-a232-c434ce416af6.json @@ -0,0 +1,134 @@ +{ + "id": "4e6fcf72-daf3-439f-a232-c434ce416af6", + "snapshot": "libreoffice_calc", + "instruction": "Please calculate the ages of the employees according to their birthday.", + "source": "https://www.youtube.com/shorts/0uxJccNCKcE", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1WIyJbssCCscQ96be2hF9N7tXPz23JoBT&export=download&authuser=0&confirm=t&uuid=503cdbf3-2fe3-4019-bfd1-5d1faab8d049&at=APZUnTV-XLlF8KEx7zMjtX2kYSuM:1705909207212", + "path": "C:\\Users\\user\\Employee_Age_By_Birthday.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Employee_Age_By_Birthday.xlsx" + } + } + ], + "trajectory": "trajectories/4e6fcf72-daf3-439f-a232-c434ce416af6", + "related_apps": [ + "libreoffice calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Employee_Age_By_Birthday.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1sRI72UGfHiVBRFuV4uwhr173u3Sf46Y6&export=download&authuser=0&confirm=t&uuid=90da5e2b-39c0-449d-b753-09dfed73b509&at=APZUnTVFInccKo2QB9JNnIidFfG3:1705909465173", + "path": "C:\\Users\\user\\Employee_Age_By_Birthday_gold.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Employee_Age_By_Birthday_gold.xlsx" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Employee_Age_By_Birthday_gold.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "expected": { + "type": "vm_file", + "path": "C:\\Users\\user\\Employee_Age_By_Birthday_gold.xlsx", + "dest": "Employee_Age_By_Birthday_gold.xlsx" + }, + "result": { + "type": "vm_file", + "path": "C:\\Users\\user\\Employee_Age_By_Birthday.xlsx", + "dest": "Employee_Age_By_Birthday.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/Windows/libreoffice_calc/6054afcb-5bab-4702-90a0-b259b5d3217c.json b/evaluation_examples/examples/Windows/libreoffice_calc/6054afcb-5bab-4702-90a0-b259b5d3217c.json new file mode 100644 index 0000000..072202b --- /dev/null +++ b/evaluation_examples/examples/Windows/libreoffice_calc/6054afcb-5bab-4702-90a0-b259b5d3217c.json @@ -0,0 +1,90 @@ +{ + "id": "6054afcb-5bab-4702-90a0-b259b5d3217c", + "snapshot": "libreoffice_calc", + "instruction": "Some data are missed by now and are filled by 'N/A' temporarily. Please hide them in the table for now. Do not delete them and filter is no needed.", + "source": "https://www.youtube.com/shorts/JTbZ8sRxkdU", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1e1Ccsr_CQau9-boF92GxzZ0RtEHPtfdX&export=download&authuser=0&confirm=t&uuid=a1d4518d-e085-4bfa-ae6f-2514ed48efba&at=APZUnTU_ng4YNBQO7u6Dsuj21Gmq:1705911243359", + "path": "C:\\Users\\user\\Date_Budget_Variance_HideNA.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Date_Budget_Variance_HideNA.xlsx" + } + } + ], + "trajectory": "trajectories/6054afcb-5bab-4702-90a0-b259b5d3217c", + "related_apps": [ + "libreoffice calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Date_Budget_Variance_HideNA.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1ReZexJAvbAAUng0JD3lEHN70J0WcS0_i&export=download&authuser=0&confirm=t&uuid=a11148b1-93e8-4634-a413-26e0e433c2c9&at=APZUnTV6KulVQf6LpHl4IVNqE5hA:1705914637572", + "dest": "Date_Budget_Variance_HideNA_gold.xlsx" + }, + "result": { + "type": "vm_file", + "path": "C:\\Users\\user\\Date_Budget_Variance_HideNA.xlsx", + "dest": "Date_Budget_Variance_HideNA.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + }, + { + "type": "row_props", + "sheet_idx0": 0, + "sheet_idx1": "EI0", + "props": [ + "hidden" + ] + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/Windows/libreoffice_calc/7a4e4bc8-922c-4c84-865c-25ba34136be1.json b/evaluation_examples/examples/Windows/libreoffice_calc/7a4e4bc8-922c-4c84-865c-25ba34136be1.json new file mode 100644 index 0000000..ebbd81a --- /dev/null +++ b/evaluation_examples/examples/Windows/libreoffice_calc/7a4e4bc8-922c-4c84-865c-25ba34136be1.json @@ -0,0 +1,82 @@ +{ + "id": "7a4e4bc8-922c-4c84-865c-25ba34136be1", + "snapshot": "libreoffice_calc", + "instruction": "Reorder the columns to be \"Date\", \"First Name\", \"Last Name\", \"Order ID\", \"Sales\"", + "source": "https://www.youtube.com/shorts/bvUhr1AHs44", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1jS159dHRdeZh54A8WgqJn_somQpKP1K_&export=download&authuser=0&confirm=t&uuid=06aa5391-5f3e-499b-9073-9ce24109f91f&at=APZUnTWFCuiN8b5ilNmxx3aPYGUF:1705551767314", + "path": "C:\\Users\\user\\Name_Order_Id_move_column.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Name_Order_Id_move_column.xlsx" + } + } + ], + "trajectory": "trajectories/7a4e4bc8-922c-4c84-865c-25ba34136be1", + "related_apps": [ + "libreoffice calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Name_Order_Id_move_column.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1jAK4--qLRdUOA57MBsmERpR205GGIMij&export=download&authuser=0&confirm=t&uuid=4ca454c9-d9d5-4e5f-95e1-cb0e849932e5&at=APZUnTVgkUbmLJLC4O7ACKmzfFbq:1705551965635", + "dest": "Name_Order_Id_move_column_gold.xlsx" + }, + "result": { + "type": "vm_file", + "path": "C:\\Users\\user\\Name_Order_Id_move_column.xlsx", + "dest": "Name_Order_Id_move_column.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/Windows/libreoffice_calc/7efeb4b1-3d19-4762-b163-63328d66303b.json b/evaluation_examples/examples/Windows/libreoffice_calc/7efeb4b1-3d19-4762-b163-63328d66303b.json new file mode 100644 index 0000000..6400051 --- /dev/null +++ b/evaluation_examples/examples/Windows/libreoffice_calc/7efeb4b1-3d19-4762-b163-63328d66303b.json @@ -0,0 +1,82 @@ +{ + "id": "7efeb4b1-3d19-4762-b163-63328d66303b", + "snapshot": "libreoffice_calc", + "instruction": "Fill the Sequence Numbers as \"No. #\" in the \"Seq No.\" column", + "source": "https://www.youtube.com/shorts/4jzXfZNhfmk", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1muvWVg44C6EtBpBQkZ6_ylg3M8r3jjOC&export=download&authuser=0&confirm=t&uuid=41354a7c-d199-4044-960e-0d146fe6f12a&at=APZUnTW3nj5RV3SBJt5tdeVxM4mM:1705553238027", + "path": "C:\\Users\\user\\Order_Sales_Serial#.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Order_Sales_Serial#.xlsx" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Order_Sales_Serial#.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1K37qrxab2cib1SkKPthYkb83fzaj9ETa&export=download&authuser=0&confirm=t&uuid=66b763e6-2fb1-46fc-9753-7f8533366b82&at=APZUnTU1nCKKXBwCbyI8aKpdhE5W:1705560437314", + "dest": "Order_Sales_Serial#_gold.xlsx" + }, + "result": { + "type": "vm_file", + "path": "C:\\Users\\user\\Order_Sales_Serial#.xlsx", + "dest": "Order_Sales_Serial#.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/Windows/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json b/evaluation_examples/examples/Windows/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json new file mode 100644 index 0000000..c6b428b --- /dev/null +++ b/evaluation_examples/examples/Windows/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json @@ -0,0 +1,90 @@ +{ + "id": "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14", + "snapshot": "libreoffice_calc", + "instruction": "Given a partial calendar, please highlight all the weekends (Satureday & Sunday) by setting the cell background as red (#ff0000).", + "source": "https://www.youtube.com/shorts/Hbcwu6IQ1ns", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1_gyig5Vs3VOuvkjRoLt2ZpXBIyCZfUmV&export=download&authuser=0&confirm=t&uuid=ed113cdd-4279-454b-a66d-07447e31c818&at=APZUnTVztf5DcbF0DjLJitkpUUxt:1705920417565", + "path": "C:\\Users\\user\\Calendar_Highlight_Weekend_Days.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Calendar_Highlight_Weekend_Days.xlsx" + } + } + ], + "trajectory": "trajectories/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14", + "related_apps": [ + "libreoffice calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Calendar_Highlight_Weekend_Days.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1URKsHvPdWDvB-qwsIZ-SqHAmiXaosXKW&export=download&authuser=0&confirm=t&uuid=849064c9-7402-48c5-87f6-e5c290e4bd24&at=APZUnTXarmqM0cO4I0z-Lv7MElzX:1705920495794", + "dest": "Calendar_Highlight_Weekend_Days_gold.xlsx" + }, + "result": { + "type": "vm_file", + "path": "C:\\Users\\user\\Calendar_Highlight_Weekend_Days.xlsx", + "dest": "Calendar_Highlight_Weekend_Days.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + }, + { + "type": "style", + "sheet_idx0": 0, + "sheet_idx1": "EI0", + "props": [ + "bgcolor" + ] + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/Windows/libreoffice_calc/a9f325aa-8c05-4e4f-8341-9e4358565f4f.json b/evaluation_examples/examples/Windows/libreoffice_calc/a9f325aa-8c05-4e4f-8341-9e4358565f4f.json new file mode 100644 index 0000000..d019a15 --- /dev/null +++ b/evaluation_examples/examples/Windows/libreoffice_calc/a9f325aa-8c05-4e4f-8341-9e4358565f4f.json @@ -0,0 +1,82 @@ +{ + "id": "a9f325aa-8c05-4e4f-8341-9e4358565f4f", + "snapshot": "libreoffice_calc", + "instruction": "Remove the adundant whitespaces and canonicalize the letter cases by capitalizing the first letter of each words and leave other letters as lower case.", + "source": "https://www.youtube.com/shorts/A0gmEBRKXWs", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1PGJxDM5QglZFdLd7yLQnA2MASf2so14S&export=download&authuser=0&confirm=t&uuid=725f7d90-1e30-4579-b946-7c8932aac440&at=APZUnTUpha_2nl3sAOmM1AeCCir_:1705561280883", + "path": "C:\\Users\\user\\Movie_title_garbage_clean.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Movie_title_garbage_clean.xlsx" + } + } + ], + "trajectory": "trajectories/a9f325aa-8c05-4e4f-8341-9e4358565f4f", + "related_apps": [ + "libreoffice calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Movie_title_garbage_clean.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1hrmRQig53UW069MEN_V23xJXFrIooylm&export=download&authuser=0&confirm=t&uuid=82a01881-c72c-4463-b258-c8c66f8d72af&at=APZUnTVZVuQj91twmMdH1plMcABA:1705561377705", + "dest": "Movie_title_garbage_clean_gold.xlsx" + }, + "result": { + "type": "vm_file", + "path": "C:\\Users\\user\\Movie_title_garbage_clean.xlsx", + "dest": "Movie_title_garbage_clean.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/Windows/libreoffice_calc/abed40dc-063f-4598-8ba5-9fe749c0615d.json b/evaluation_examples/examples/Windows/libreoffice_calc/abed40dc-063f-4598-8ba5-9fe749c0615d.json new file mode 100644 index 0000000..befc9f5 --- /dev/null +++ b/evaluation_examples/examples/Windows/libreoffice_calc/abed40dc-063f-4598-8ba5-9fe749c0615d.json @@ -0,0 +1,82 @@ +{ + "id": "abed40dc-063f-4598-8ba5-9fe749c0615d", + "snapshot": "libreoffice_calc", + "instruction": "Check the names in column \"Names with duplicates\" and put the unique ones in column \"Unique Names\". Keep the original order.", + "source": "https://help.libreoffice.org/7.6/ro/text/scalc/guide/remove_duplicates.html?&DbPAR=SHARED&System=UNIX", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1A3O37a2M_tkmXHUn6G8kYu73cUMRUZnt&export=download&authuser=0&confirm=t&uuid=9a44147f-15e4-426c-9235-74fdda7439dc&at=APZUnTU4MAD7rODyryb9r0YolrrN:1705918712764", + "path": "C:\\Users\\user\\Names_Duplicate_Unique.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Names_Duplicate_Unique.xlsx" + } + } + ], + "trajectory": "trajectories/abed40dc-063f-4598-8ba5-9fe749c0615d", + "related_apps": [ + "libreoffice calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Names_Duplicate_Unique.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1GYG97VdmPG9mlhSBjMlMpjsuDsEDWXNB&export=download&authuser=0&confirm=t&uuid=6dd49f77-6a87-4f99-9027-0c74bad23d6d&at=APZUnTWzHV6JFiTPuo2ICUSEZqq8:1705918802025", + "dest": "Names_Duplicate_Unique_gold.xlsx" + }, + "result": { + "type": "vm_file", + "path": "C:\\Users\\user\\Names_Duplicate_Unique.xlsx", + "dest": "Names_Duplicate_Unique.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/Windows/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json b/evaluation_examples/examples/Windows/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json new file mode 100644 index 0000000..42e2871 --- /dev/null +++ b/evaluation_examples/examples/Windows/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json @@ -0,0 +1,82 @@ +{ + "id": "eb03d19a-b88d-4de4-8a64-ca0ac66f426b", + "snapshot": "libreoffice_calc", + "instruction": "Transpose the table and paste it starting from B8", + "source": "https://www.youtube.com/shorts/t9JLUaT55UQ", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1Zl6ZUkbiV9sISjJRfracd8-xKrW2G3yv&export=download&authuser=0&confirm=t&uuid=6799cca6-62d2-4cfa-b28a-b8da486b5d01&at=APZUnTVy7-CQMjvuFyu7ZMxztfT6:1705563660974", + "path": "C:\\Users\\user\\Students_Class_Subject_Marks.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Students_Class_Subject_Marks.xlsx" + } + } + ], + "trajectory": "trajectories/eb03d19a-b88d-4de4-8a64-ca0ac66f426b", + "related_apps": [ + "libreoffice calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Students_Class_Subject_Marks.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1GwqF52n6cvmQ-ivhmfNCep9mZboQ706-&export=download&authuser=0&confirm=t&uuid=4c750a20-73b8-4072-a4bf-3a4aa77b314a&at=APZUnTXYm-3NVdfneACqfDLYCWko:1705563733494", + "dest": "Students_Class_Subject_Marks_gold.xlsx" + }, + "result": { + "type": "vm_file", + "path": "C:\\Users\\user\\Students_Class_Subject_Marks.xlsx", + "dest": "Students_Class_Subject_Marks.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/Windows/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json b/evaluation_examples/examples/Windows/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json new file mode 100644 index 0000000..6dd53fa --- /dev/null +++ b/evaluation_examples/examples/Windows/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json @@ -0,0 +1,109 @@ +{ + "id": "ecb0df7a-4e8d-4a03-b162-053391d3afaf", + "snapshot": "libreoffice_calc", + "instruction": "Enable each cell in the column\"Pass/Fail/Held\" is a drop down list", + "source": "https://www.youtube.com/shorts/tXOovKn0H68", + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1vKtnFG-sL7Ba0UCcUh4dWJDfZeAHAP6l&export=download&authuser=0&confirm=t&uuid=70a267fa-dc71-4893-8fe3-a7254e50c567&at=APZUnTVfnhipGIdrD39159Eqv9lf:1705567650653", + "path": "C:\\Users\\user\\Order_Id_Mark_Pass_Fail.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "C:\\Users\\user\\Order_Id_Mark_Pass_Fail.xlsx" + } + } + ], + "trajectory": "trajectories/ecb0df7a-4e8d-4a03-b162-053391d3afaf", + "related_apps": [ + "libreoffice calc" + ], + "evaluator": { + "postconfig": [ + { + "type": "activate_window", + "parameters": { + "window_name": "Order_Id_Mark_Pass_Fail.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + } + ], + "func": "compare_table", + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": 0, + "sheet_idx1": "EI0" + }, + { + "type": "data_validation", + "sheet_idx": 0, + "dv_props": [ + { + "ranges": { + "method": "spreadsheet_range", + "ref": [ + "D2:D29", + "D2:D1048576" + ] + }, + "type": { + "method": "eq", + "ref": "list" + }, + "formula1": { + "method": "str_set_eq", + "ref": [ + "Pass", + "Fail", + "Held" + ] + } + } + ] + } + ] + }, + "result": { + "type": "vm_file", + "path": "C:\\Users\\user\\Order_Id_Mark_Pass_Fail.xlsx", + "dest": "Order_Id_Mark_Pass_Fail.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=125u7hc0LOcHXtJhOBd_Z5vt__7kwYOTQ&export=download&authuser=0&confirm=t&uuid=17e5da1b-fb1d-45d8-a9b1-6cd146ebaeee&at=APZUnTVqGyk6n5NPKzrq4KSOe871:1705898482101", + "dest": "Order_Id_Mark_Pass_Fail_gold.xlsx" + } + } +} \ No newline at end of file From 5ee9621e0dfa24face449fba6c179cc300712ab0 Mon Sep 17 00:00:00 2001 From: David Chang Date: Fri, 2 Feb 2024 05:13:12 +0800 Subject: [PATCH 5/8] ver Feb2nd human evaluation as non-expert on chrome tasks --- branch_flag | 2 +- desktop_env/controllers/setup.py | 54 +++++++++++-------- .../bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json | 10 ++++ main.py | 4 +- 4 files changed, 46 insertions(+), 24 deletions(-) diff --git a/branch_flag b/branch_flag index 760637d..9daeafb 100644 --- a/branch_flag +++ b/branch_flag @@ -1 +1 @@ -exp_som +test diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index 38646be..a29021b 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -7,6 +7,7 @@ import uuid import tempfile from typing import Any, Union, Optional from typing import Dict, List +import os import requests from pydrive.auth import GoogleAuth @@ -114,6 +115,7 @@ class SetupController: if not os.path.exists(cache_path): max_retries = 3 downloaded = False + e = None for i in range(max_retries): try: response = requests.get(url, stream=True) @@ -128,7 +130,7 @@ class SetupController: break except requests.RequestException as e: - logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)") + logger.error(f"Failed to download {url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)") if not downloaded: raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}") @@ -344,39 +346,49 @@ class SetupController: port = 9222 # fixme: this port is hard-coded, need to be changed from config file remote_debugging_url = f"http://{host}:{port}" - with sync_playwright() as p: + logger.info("Connect to Chrome @: %s", remote_debugging_url) + logger.debug("PLAYWRIGHT ENV: %s", repr(os.environ)) + for attempt in range(15): + if attempt>0: + time.sleep(5) + browser = None - for attempt in range(15): + with sync_playwright() as p: try: browser = p.chromium.connect_over_cdp(remote_debugging_url) - break + #break except Exception as e: if attempt < 14: logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}") - time.sleep(1) + #time.sleep(10) + continue else: logger.error(f"Failed to connect after multiple attempts: {e}") raise e - if not browser: - return + if not browser: + return - for i, url in enumerate(urls_to_open): - # Use the first context (which should be the only one if using default profile) - if i == 0: - context = browser.contexts[0] + logger.info("Opening %s...", urls_to_open) + for i, url in enumerate(urls_to_open): + # Use the first context (which should be the only one if using default profile) + if i == 0: + context = browser.contexts[0] - page = context.new_page() # Create a new page (tab) within the existing context - page.goto(url, timeout=60000) - logger.info(f"Opened tab {i + 1}: {url}") + page = context.new_page() # Create a new page (tab) within the existing context + try: + page.goto(url, timeout=60000) + except: + logger.warning("Opening %s exceeds time limit", url) # only for human test + logger.info(f"Opened tab {i + 1}: {url}") - if i == 0: - # clear the default tab - default_page = context.pages[0] - default_page.close() + if i == 0: + # clear the default tab + default_page = context.pages[0] + default_page.close() - # Do not close the context or browser; they will remain open after script ends - return browser, context + # Do not close the context or browser; they will remain open after script ends + return browser, context def _chrome_close_tabs_setup(self, urls_to_close: List[str]): time.sleep(5) # Wait for Chrome to finish launching @@ -552,4 +564,4 @@ class SetupController: else: raise NotImplementedError - return browser, context \ No newline at end of file + return browser, context diff --git a/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json b/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json index 6f2cbf5..06ffe50 100644 --- a/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json +++ b/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json @@ -12,6 +12,16 @@ "--remote-debugging-port=9222" ] } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } } ], "trajectory": "trajectories/", diff --git a/main.py b/main.py index 627ed5f..471c625 100644 --- a/main.py +++ b/main.py @@ -47,9 +47,9 @@ def human_agent(): Runs the Gym environment with human input. """ - with open("evaluation_examples/examples/thunderbird/c9e7eaf2-b1a1-4efc-a982-721972fa9f02.json", "r") as f: + with open("evaluation_examples/examples/chrome/af630914-714e-4a24-a7bb-f9af687d3b91.json", "r") as f: example = json.load(f) - example["snapshot"] = "Snapshot 30" + example["snapshot"] = "Snapshot 35" #env = DesktopEnv( path_to_vm="~/vmware/Windows 10 x64/Windows 10 x64.vmx" env = DesktopEnv( path_to_vm="/mnt/data1/david/os-images/Ubuntu-1218/Ubuntu.vmx" From 5c6748d39ae7487f63ad9d9ae6aaa677b7d88eb4 Mon Sep 17 00:00:00 2001 From: rhythmcao Date: Fri, 2 Feb 2024 05:33:03 +0800 Subject: [PATCH 6/8] fix error in writer --- .../66399b0d-8fda-4618-95c4-bfc6191617e9.json | 4 ++-- .../6ada715d-3aae-4a32-a6a7-429b2e43fb93.json | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/evaluation_examples/examples/libreoffice_writer/66399b0d-8fda-4618-95c4-bfc6191617e9.json b/evaluation_examples/examples/libreoffice_writer/66399b0d-8fda-4618-95c4-bfc6191617e9.json index 33b8a1a..ec78638 100644 --- a/evaluation_examples/examples/libreoffice_writer/66399b0d-8fda-4618-95c4-bfc6191617e9.json +++ b/evaluation_examples/examples/libreoffice_writer/66399b0d-8fda-4618-95c4-bfc6191617e9.json @@ -1,7 +1,7 @@ { "id": "66399b0d-8fda-4618-95c4-bfc6191617e9", "snapshot": "libreoffice_writer", - "instruction": "Could you help me insert a 7*5 empty table at the point of cursor?", + "instruction": "Could you help me insert a 7(columns)*5(rows) empty table at the point of cursor?", "source": "https://www.youtube.com/watch?v=l25Evu4ohKg", "config": [ { @@ -27,7 +27,7 @@ "command": [ "python", "-c", - "import pyautogui; import time; time.sleep(5); pyautogui.press(\"down\", presses=40, interval=10); time.sleep(1); pyautogui.scroll(-2)" + "import pyautogui; import time; pyautogui.press(\"down\", presses=40, interval=0.01); time.sleep(1); pyautogui.scroll(-2)" ] } } diff --git a/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json b/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json index 7151032..9c368bf 100644 --- a/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json +++ b/evaluation_examples/examples/libreoffice_writer/6ada715d-3aae-4a32-a6a7-429b2e43fb93.json @@ -38,7 +38,7 @@ "command": [ "python", "-c", - "import pyautogui; import time; time.sleep(5); pyautogui.press(\"down\", presses=8, interval=3); time.sleep(1); pyautogui.scroll(-2)" + "import pyautogui; import time; time.sleep(5); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)" ] } } @@ -81,7 +81,7 @@ }, "expected": { "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1xbhlfqGrPutHHi2aHg66jwXD-yaZpe9j&export=download&authuser=0&confirm=t&uuid=427765e0-3f97-4a72-92db-a1fe7cdde73b&at=APZUnTUhNLh2PDu4OGkCVQW-LPCd:1704173991269", + "path": "https://drive.usercontent.google.com/download?id=1xbhlfqGrPutHHi2aHg66jwXD-yaZpe9j&export=download&authuser=0&confirm=t&uuid=802d477e-d97b-4641-84fb-9eaf8805c35c&at=APZUnTWS0KOqHCPnufPJfDEfGE2u:1706822844322", "dest": "Viewing_Your_Class_Schedule_and_Textbooks_Gold.docx" } } From 6d792e594edf9fc626aa2aa6ecbfd178e952a760 Mon Sep 17 00:00:00 2001 From: David Chang Date: Fri, 2 Feb 2024 07:20:04 +0800 Subject: [PATCH 7/8] ver Feb2nd human evaluation as non-expert on vlc --- desktop_env/evaluators/README.md | 4 ++-- .../examples/vlc/8f080098-ddb1-424c-b438-4e96e5e4786e.json | 2 +- main.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/desktop_env/evaluators/README.md b/desktop_env/evaluators/README.md index d7a746c..232cb0b 100644 --- a/desktop_env/evaluators/README.md +++ b/desktop_env/evaluators/README.md @@ -191,7 +191,7 @@ To enable and use the HTTP interface in VLC Media Player for remote control and #### 4. Configure Lua HTTP - Expand the `Main interfaces` node and select `Lua`. -- Under `Lua HTTP`, set a password in the `Lua HTTP` section. This password will be required to access the HTTP interface. +- Under `Lua HTTP`, set a password `password` in the `Lua HTTP` section. This password will be required to access the HTTP interface. #### 5. Save and Restart VLC @@ -217,4 +217,4 @@ pip install opencv-python-headless Pillow imagehash - If the port is in use by another application, you may change the port number in VLC's settings. ## GIMP -Click on the "Keep" of the image loading pop-up. \ No newline at end of file +Click on the "Keep" of the image loading pop-up. diff --git a/evaluation_examples/examples/vlc/8f080098-ddb1-424c-b438-4e96e5e4786e.json b/evaluation_examples/examples/vlc/8f080098-ddb1-424c-b438-4e96e5e4786e.json index 7b521e8..2a06829 100644 --- a/evaluation_examples/examples/vlc/8f080098-ddb1-424c-b438-4e96e5e4786e.json +++ b/evaluation_examples/examples/vlc/8f080098-ddb1-424c-b438-4e96e5e4786e.json @@ -1,7 +1,7 @@ { "id": "8f080098-ddb1-424c-b438-4e96e5e4786e", "snapshot": "base_setup", - "instruction": "Could you download the song from this music video and save it as an MP3 file? I'd like to have it on my device to play whenever I want. Please title the file \"Baby Justin Bieber.mp3.\" I really appreciate your help!", + "instruction": "Could you download the song from this music video and save it as an MP3 file? I'd like to have it on my device to play whenever I want. Please save the file just on the desktop and title the file \"Baby Justin Bieber.mp3.\" I really appreciate your help!", "source": "https://medium.com/@jetscribe_ai/how-to-extract-mp3-audio-from-videos-using-vlc-media-player-beeef644ebfb", "config": [ { diff --git a/main.py b/main.py index 471c625..9f0b324 100644 --- a/main.py +++ b/main.py @@ -47,9 +47,9 @@ def human_agent(): Runs the Gym environment with human input. """ - with open("evaluation_examples/examples/chrome/af630914-714e-4a24-a7bb-f9af687d3b91.json", "r") as f: + with open("evaluation_examples/examples/vlc/215dfd39-f493-4bc3-a027-8a97d72c61bf.json", "r") as f: example = json.load(f) - example["snapshot"] = "Snapshot 35" + example["snapshot"] = "Snapshot 36" #env = DesktopEnv( path_to_vm="~/vmware/Windows 10 x64/Windows 10 x64.vmx" env = DesktopEnv( path_to_vm="/mnt/data1/david/os-images/Ubuntu-1218/Ubuntu.vmx" From 3184f091a85fc3f0629ba25fe3a74cc198a448be Mon Sep 17 00:00:00 2001 From: rhythmcao Date: Fri, 2 Feb 2024 07:24:26 +0800 Subject: [PATCH 8/8] fix error in long video recording in server/main.py --- desktop_env/server/main.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index a1e57e7..e66ddd4 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -2,7 +2,7 @@ import ctypes import os import platform import shlex -import subprocess +import subprocess, signal from pathlib import Path from typing import Any, Optional from typing import List, Dict, Tuple @@ -997,7 +997,7 @@ def start_recording(): start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}" - recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) + recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) return jsonify({'status': 'success', 'message': 'Started recording.'}) @@ -1009,10 +1009,8 @@ def end_recording(): if not recording_process: return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400 - recording_process.terminate() + recording_process.send_signal(signal.SIGINT) recording_process.wait() - # return_code = recording_process.returncode - output, error = recording_process.communicate() recording_process = None # return recording video file