From 4070b41fbdea3e53f11d1c7aa39e4e4427e40bda Mon Sep 17 00:00:00 2001 From: tsuky_chen <3107760494@qq.com> Date: Fri, 8 Mar 2024 20:36:34 +0800 Subject: [PATCH] fix multi apps --- desktop_env/evaluators/getters/__init__.py | 2 +- desktop_env/evaluators/getters/general.py | 16 ++++++ .../68a25bd4-59c7-4f4d-975e-da0c8509c848.json | 12 +---- .../69acbb55-d945-4927-a87b-8480e1a5bb7e.json | 9 ++-- .../74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json | 49 ++++++++++++++----- .../869de13e-bef9-4b91-ba51-f6708c40b096.json | 6 +-- .../acb0f96b-e27c-44d8-b55f-7cb76609dfcd.json | 31 +++++++----- .../b5062e3e-641c-4e3a-907b-ac864d2e7652.json | 13 +++-- .../c7c1e4c3-9e92-4eba-a4b8-689953975ea4.json | 8 --- .../da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json | 2 +- .../f918266a-b3e0-4914-865d-4faa564f1aef.json | 6 +-- 11 files changed, 96 insertions(+), 58 deletions(-) diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py index 958d98d..a4d8782 100644 --- a/desktop_env/evaluators/getters/__init__.py +++ b/desktop_env/evaluators/getters/__init__.py @@ -27,7 +27,7 @@ from .chrome import ( get_info_from_website ) from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file -from .general import get_vm_command_line, get_vm_terminal_output +from .general import get_vm_command_line, get_vm_terminal_output, get_vm_command_error from .gimp import get_gimp_config_file from .impress import get_audio_in_slide from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory diff --git a/desktop_env/evaluators/getters/general.py b/desktop_env/evaluators/getters/general.py index a34d981..81ad69b 100644 --- a/desktop_env/evaluators/getters/general.py +++ b/desktop_env/evaluators/getters/general.py @@ -21,6 +21,22 @@ def get_vm_command_line(env, config: Dict[str, str]): logger.error("Failed to get vm command line. Status code: %d", response.status_code) return None +def get_vm_command_error(env, config: Dict[str, str]): + vm_ip = env.vm_ip + port = 5000 + command = config["command"] + shell = config.get("shell", False) + + response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command, "shell": shell}) + + print(response.json()) + + if response.status_code == 200: + return response.json()["error"] + else: + logger.error("Failed to get vm command line error. Status code: %d", response.status_code) + return None + def get_vm_terminal_output(env, config: Dict[str, str]): return env.controller.get_terminal_output() diff --git a/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json b/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json index ec48118..b115c1f 100644 --- a/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json +++ b/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json @@ -9,7 +9,7 @@ "parameters": { "files": [ { - "url": "https://drive.usercontent.google.com/download?id=1g2lhX7CtuT6kc2Bmss0WU5zT1eXnnbEU&export=download&authuser=0&confirm=t&uuid=556ab51e-f4b7-4140-b38e-f93c0219f43a&at=APZUnTWsI26GRLwtp-r3_m9qNZCd:1709791386045", + "url": "https://drive.usercontent.google.com/download?id=1g2lhX7CtuT6kc2Bmss0WU5zT1eXnnbEU&export=download&authuser=0&confirm=t&uuid=37232f45-fedf-46c1-a292-f1080eb8c281&at=APZUnTV8C-zCWkPmRAc3BpiKz1uo:1709894768912", "path": "/home/user/Desktop/rsc-ebook-collection-2023.xlsx" } ] @@ -40,14 +40,6 @@ ] } }, - { - "type": "chrome_open_tabs", - "parameters": { - "urls_to_open": [ - "https://www.qatarairways.com/en-hk/homepage.html" - ] - } - }, { "type": "activate_window", "parameters": { @@ -55,7 +47,7 @@ } } ], - "trajectory": "trajectories/", + "trajectory": "trajectories/68a25bd4-59c7-4f4d-975e-da0c8509c848", "related_apps": [ "libreoffice_calc", "chrome" diff --git a/evaluation_examples/examples/multi_apps/69acbb55-d945-4927-a87b-8480e1a5bb7e.json b/evaluation_examples/examples/multi_apps/69acbb55-d945-4927-a87b-8480e1a5bb7e.json index ef316d9..9a2f5ae 100644 --- a/evaluation_examples/examples/multi_apps/69acbb55-d945-4927-a87b-8480e1a5bb7e.json +++ b/evaluation_examples/examples/multi_apps/69acbb55-d945-4927-a87b-8480e1a5bb7e.json @@ -31,9 +31,9 @@ "vscode" ], "evaluator": { - "func": "exact_match", + "func": "check_include_exclude", "result": { - "type": "vm_command_line", + "type": "vm_command_error", "command": [ "python", "-c", @@ -43,7 +43,10 @@ "expected": { "type": "rule", "rules": { - "expected": "" + "include": [], + "exclude": [ + "ModuleNotFoundError: No module named" + ] } } } diff --git a/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json b/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json index e3908bf..535b8ba 100644 --- a/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json +++ b/evaluation_examples/examples/multi_apps/74d5859f-ed66-4d3e-aa0e-93d7a592ce41.json @@ -34,13 +34,20 @@ { "type": "execute", "parameters": { - "command": ["mkdir", "-p", "/home/user/Projects"] + "command": [ + "mkdir", + "-p", + "/home/user/Projects" + ] } }, { "type": "launch", "parameters": { - "command": ["nautilus", "/home/user/Projects"] + "command": [ + "nautilus", + "/home/user/Projects" + ] } } ], @@ -90,32 +97,52 @@ "rules": { "expect": [ { - "key": ["name"], + "key": [ + "name" + ], "method": "eq", "ref": "happy-extension" }, { - "key": ["version"], + "key": [ + "version" + ], "method": "eq", "ref": "0.0.1" }, { - "key": ["background", "scripts"], + "key": [ + "background", + "scripts" + ], "method": "eq", - "ref": ["background_script.js"] + "ref": [ + "background_script.js" + ] }, { - "key": ["browser_action", "default_icon"], + "key": [ + "browser_action", + "default_icon" + ], "method": "eq", - "ref": {"64": "icons/icon.png"} + "ref": { + "64": "icons/icon.png" + } }, { - "key": ["browser_action", "default_popup"], + "key": [ + "browser_action", + "default_popup" + ], "method": "eq", "ref": "browserAction/index.html" }, { - "key": ["browser_action", "default_title"], + "key": [ + "browser_action", + "default_title" + ], "method": "eq", "ref": "happy-extension" } @@ -144,4 +171,4 @@ } ] } -} +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json b/evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json index 8fe7df9..a208df3 100644 --- a/evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json +++ b/evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json @@ -74,10 +74,6 @@ "url": "https://drive.usercontent.google.com/download?id=110FbG5m_XDDUpO3bvUZTPzw9y0ce1KB5&export=download&authuser=0&confirm=t&uuid=cbce5e69-d4dd-4508-8192-647d6da54fc0&at=APZUnTWxxAS53KTZJklxT6riooU6:1709357558943", "path": "/home/user/Desktop/IA_Format.docx" }, - { - "url": "https://drive.usercontent.google.com/download?id=1O1yd3Dm_nKEtknQYzDAHqPllv0BeQ2bp&export=download&authuser=0&confirm=t&uuid=636ee37e-4ee1-4415-ba71-73e42335c4bf&at=APZUnTW7tJhD6LG7CglaqI_y0Exu:1709357564381", - "path": "/home/user/Desktop/S1578219008703378.pdf" - }, { "url": "https://drive.usercontent.google.com/download?id=1WA-QneFJF9JJ_KKs9ksZ8N7ggkhRIt6Y&export=download&authuser=0&confirm=t&uuid=3fbc694a-8470-4a8f-a6e9-9d64d94b3a5e&at=APZUnTXi4PtZ42zRYDj-VGbqH2Ts:1709793215203", "path": "/home/user/Desktop/paper01.pdf" @@ -164,7 +160,7 @@ { "type": "rule", "rules": { - "expected": "07-cluster-kMean (1).ppt\n2023_validation_7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx\nassignment_mark_frontpage.docx\ncco-return-to-school-survey-underlying-data-tables.xlsx\nDOC_2480903712718068684.pdf\nFamily Status Equality-Eng (Aug 2021).pdf\nIA_Format.docx\nS1578219008703378.pdf\n" + "expected": "07-cluster-kMean (1).ppt\n2023_validation_7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx\nassignment_mark_frontpage.docx\ncco-return-to-school-survey-underlying-data-tables.xlsx\nDOC_2480903712718068684.pdf\nFamily Status Equality-Eng (Aug 2021).pdf\nIA_Format.docx\n" } } ] diff --git a/evaluation_examples/examples/multi_apps/acb0f96b-e27c-44d8-b55f-7cb76609dfcd.json b/evaluation_examples/examples/multi_apps/acb0f96b-e27c-44d8-b55f-7cb76609dfcd.json index a816523..24034a8 100644 --- a/evaluation_examples/examples/multi_apps/acb0f96b-e27c-44d8-b55f-7cb76609dfcd.json +++ b/evaluation_examples/examples/multi_apps/acb0f96b-e27c-44d8-b55f-7cb76609dfcd.json @@ -30,20 +30,25 @@ "chrome" ], "evaluator": { - "func": "is_extension_installed", - "result": { - "type": "vm_command_line", - "command": [ - "ls", - "/home/user/" - ] - }, - "expected": { - "type": "rule", - "rules": { - "type": "contain", - "expected": "instructor-embedding" + "postconfig": [ + { + "type": "execute", + "parameters": { + "command": "cd /home/user && ls -R instructor-embedding/ > log.txt", + "shell": true + } } + ], + "func": "compare_text_file", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1aWWgwZFhsT1ByRbZkofO1Hf9zWf2Df7S&export=download&authuser=0&confirm=t&uuid=b3643d9a-5e1f-4eaf-9a78-887aebda7cee&at=APZUnTU3vX5b_NQpZW8q3nlbIo7p:1709898581495", + "dest": "log_Gold.txt" + }, + "result": { + "type": "vm_file", + "path": "/home/user/log.txt", + "dest": "log.txt" } } } \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json index 5d84224..4ff2de2 100644 --- a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json +++ b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json @@ -7,7 +7,11 @@ { "type": "command", "parameters": { - "command": ["mkdir", "-p", "/home/user/Documents/Papers"] + "command": [ + "mkdir", + "-p", + "/home/user/Documents/Papers" + ] } }, { @@ -36,7 +40,10 @@ { "type": "execute", "parameters": { - "command": ["nautilus", "/home/user/Documents/Papers"] + "command": [ + "nautilus", + "/home/user/Documents/Papers" + ] } } ], @@ -97,4 +104,4 @@ ] } } -} +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/c7c1e4c3-9e92-4eba-a4b8-689953975ea4.json b/evaluation_examples/examples/multi_apps/c7c1e4c3-9e92-4eba-a4b8-689953975ea4.json index ff71562..e9bab53 100644 --- a/evaluation_examples/examples/multi_apps/c7c1e4c3-9e92-4eba-a4b8-689953975ea4.json +++ b/evaluation_examples/examples/multi_apps/c7c1e4c3-9e92-4eba-a4b8-689953975ea4.json @@ -23,14 +23,6 @@ ] } }, - { - "type": "chrome_open_tabs", - "parameters": { - "urls_to_open": [ - "https://www.qatarairways.com/en-hk/homepage.html" - ] - } - }, { "type": "download", "parameters": { diff --git a/evaluation_examples/examples/multi_apps/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json b/evaluation_examples/examples/multi_apps/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json index 3fc1778..689189d 100644 --- a/evaluation_examples/examples/multi_apps/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json +++ b/evaluation_examples/examples/multi_apps/da52d699-e8d2-4dc5-9191-a2199e0b6a9b.json @@ -1,7 +1,7 @@ { "id": "da52d699-e8d2-4dc5-9191-a2199e0b6a9b", "snapshot": "libreoffice_calc", - "instruction": "Examine the spreadsheet on the desktop, which contains a record of books read in 2022. Identify the book with the slowest reading pace, measured in words per day. I have an empty document named 'book_list_result.docx' on the desktop; please open it and record the title there.", + "instruction": "Examine the spreadsheet on the desktop, which contains a record of books read in 2022. Take the website https://howlongtoread.com/ as a reference to identify the book with the slowest reading pace, measured in words per day. I have an empty document named 'book_list_result.docx' on the desktop; please open it and record the title there.", "source": "GAIA", "config": [ { diff --git a/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json b/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json index 001b035..0a7673f 100644 --- a/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json +++ b/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json @@ -1,7 +1,7 @@ { "id": "f918266a-b3e0-4914-865d-4faa564f1aef", "snapshot": "vscode", - "instruction": "Please finalize the code and retrieve the output from the Python script 'calculator.py' located on the desktop and save it as 'log.txt' in the same directory as the Python file.", + "instruction": "Please complete the code and retrieve the output from the Python script 'calculator.py' located on the desktop and save it as 'log.txt' in the same directory as the Python file.", "source": "GAIA", "config": [ { @@ -35,8 +35,8 @@ "func": "compare_text_file", "expected": { "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1-14AgA1nHNL22VD_3QtRzWaMjIBa3RvJ&export=download&authuser=0&confirm=t&uuid=91e2d5bc-a7f0-4abc-9aed-aa8b4956fb45&at=APZUnTU2TksSVngTw6WRJv8wpmoU:1709367652463", - "dest": "log.txt" + "path": "https://drive.usercontent.google.com/download?id=1-14AgA1nHNL22VD_3QtRzWaMjIBa3RvJ&export=download&authuser=0&confirm=t&uuid=6aa05bf1-4964-4f7b-8983-d28540b4053b&at=APZUnTXuJgDHIYA2FZl3A_OQJEOF:1709881263131", + "dest": "log_Gold.txt" }, "result": { "type": "vm_file",