From 46b99147e9c396a2a590c20cf0dad36d32b719a4 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 31 Jan 2024 10:56:24 +0800 Subject: [PATCH 1/5] ver Jan31stv2 added a warning comment about expanding tildes in command parameter of subprocess.xxx --- desktop_env/server/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 5cf7ae1..a1e57e7 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -56,7 +56,6 @@ def execute_command(): if isinstance(command, str) and not shell: command = shlex.split(command) - # WARNING: auto expansion should be completed by set `shell=True` and use str as `command` # Expand user directory for i, arg in enumerate(command): if arg.startswith("~/"): @@ -99,7 +98,6 @@ def launch_app(): if isinstance(command, str) and not shell: command = shlex.split(command) - # WARNING: auto expansion should be completed by set `shell=True` and use str as `command` # Expand user directory for i, arg in enumerate(command): if arg.startswith("~/"): From 29f2f3eaf832e895f681825dcea358367f9a3a20 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 31 Jan 2024 11:11:23 +0800 Subject: [PATCH 2/5] ver Jan31stv3 started to run SoM experiments on os tasks --- branch_flag | 2 +- experiment_screenshot_som.py | 25 +++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/branch_flag b/branch_flag index 9daeafb..760637d 100644 --- a/branch_flag +++ b/branch_flag @@ -1 +1 @@ -test +exp_som diff --git a/experiment_screenshot_som.py b/experiment_screenshot_som.py index 8247ad8..3cb4537 100644 --- a/experiment_screenshot_som.py +++ b/experiment_screenshot_som.py @@ -153,7 +153,28 @@ def main(example_class, example_id): if __name__ == '__main__': - xx_list = [ "35253b65-1c19-4304-8aa4-6884b8218fc0" + xx_list = [ "94d95f96-9699-4208-98ba-3c3119edf9c2" + , "bedcedc4-4d72-425e-ad62-21960b11fe0d" + , "43c2d64c-bab5-4dcb-a30c-b888321c319a" + , "7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82" + , "ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3" + , "a462a795-fdc7-4b23-b689-e8b6df786b78" + , "f9be0997-4b7c-45c5-b05c-4612b44a6118" + , "ae039631-2b12-4637-84f6-c67d51511be3" + , "e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15" + , "28cc3b7e-b194-4bc9-8353-d04c0f4d56d2" + , "5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57" + , "e0df059f-28a6-4169-924f-b9623e7184cc" + , "ddc75b62-7311-4af8-bfb3-859558542b36" + , "5c433d22-ed9a-4e31-91f5-54cf3e8acd63" + , "b6781586-6346-41cd-935a-a6b1487918fc" + , "b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa" + , "3ce045a0-877b-42aa-8d2c-b4a863336ab8" + , "fe41f596-a71b-4c2f-9b2f-9dcd40b568c3" + , "a4d98375-215b-4a4d-aee9-3d4370fccc41" + , "765d2b74-88a7-4d50-bf51-34e4106fd24a" + , "13584542-872b-42d8-b299-866967b5c3ef" + , "23393935-50c7-4a86-aeea-2b78fd089c5c" ] for example_id in xx_list: - main("thunderbird", example_id) + main("os", example_id) From 8a62d96fd35c04113462a5fe705d2e77b40c491a Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 31 Jan 2024 16:22:26 +0800 Subject: [PATCH 3/5] ver Jan31stv4 evaluating som on calc --- experiment_screenshot_som.py | 41 ++++++++++++++++++++++++++++-------- test_connection.py | 39 ++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 test_connection.py diff --git a/experiment_screenshot_som.py b/experiment_screenshot_som.py index 3cb4537..8fbcb16 100644 --- a/experiment_screenshot_som.py +++ b/experiment_screenshot_som.py @@ -135,6 +135,8 @@ def main(example_class, example_id): # example["snapshot"] = "exp_setup4" example["snapshot"] = "Snapshot 30" + logger.info("TASK: %s/%s", example_class, example_id) + api_key = os.environ.get("OPENAI_API_KEY") agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, max_tokens=1000, instruction=example['instruction'], action_space=action_space, exp="som") @@ -158,23 +160,44 @@ if __name__ == '__main__': , "43c2d64c-bab5-4dcb-a30c-b888321c319a" , "7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82" , "ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3" - , "a462a795-fdc7-4b23-b689-e8b6df786b78" , "f9be0997-4b7c-45c5-b05c-4612b44a6118" - , "ae039631-2b12-4637-84f6-c67d51511be3" - , "e2eb4bf1-aa93-4192-b55d-03e2fb6dfd15" , "28cc3b7e-b194-4bc9-8353-d04c0f4d56d2" , "5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57" , "e0df059f-28a6-4169-924f-b9623e7184cc" , "ddc75b62-7311-4af8-bfb3-859558542b36" - , "5c433d22-ed9a-4e31-91f5-54cf3e8acd63" , "b6781586-6346-41cd-935a-a6b1487918fc" - , "b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa" , "3ce045a0-877b-42aa-8d2c-b4a863336ab8" - , "fe41f596-a71b-4c2f-9b2f-9dcd40b568c3" , "a4d98375-215b-4a4d-aee9-3d4370fccc41" - , "765d2b74-88a7-4d50-bf51-34e4106fd24a" , "13584542-872b-42d8-b299-866967b5c3ef" , "23393935-50c7-4a86-aeea-2b78fd089c5c" + # 15, ^ os, v calc + , "eb03d19a-b88d-4de4-8a64-ca0ac66f426b" + , "0bf05a7d-b28b-44d2-955a-50b41e24012a" + , "7a4e4bc8-922c-4c84-865c-25ba34136be1" + , "a9f325aa-8c05-4e4f-8341-9e4358565f4f" + , "ecb0df7a-4e8d-4a03-b162-053391d3afaf" + , "7efeb4b1-3d19-4762-b163-63328d66303b" + , "4e6fcf72-daf3-439f-a232-c434ce416af6" + , "6054afcb-5bab-4702-90a0-b259b5d3217c" + , "abed40dc-063f-4598-8ba5-9fe749c0615d" + , "01b269ae-2111-4a07-81fd-3fcd711993b0" + , "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14" + , "0cecd4f3-74de-457b-ba94-29ad6b5dafb6" + , "4188d3a4-077d-46b7-9c86-23e1a036f6c1" + , "51b11269-2ca8-4b2a-9163-f21758420e78" + , "7e429b8d-a3f0-4ed0-9b58-08957d00b127" + , "347ef137-7eeb-4c80-a3bb-0951f26a8aff" + , "6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5" + , "3aaa4e37-dc91-482e-99af-132a612d40f3" + , "37608790-6147-45d0-9f20-1137bb35703d" + , "f9584479-3d0d-4c79-affa-9ad7afdd8850" + , "d681960f-7bc3-4286-9913-a8812ba3261a" + , "21df9241-f8d7-4509-b7f1-37e501a823f7" + , "1334ca3e-f9e3-4db8-9ca7-b4c653be7d17" + , "357ef137-7eeb-4c80-a3bb-0951f26a8aff" + , "aa3a8974-2e85-438b-b29e-a64df44deb4b" + , "a01fbce3-2793-461f-ab86-43680ccbae25" + , "4f07fbe9-70de-4927-a4d5-bb28bc12c52c" ] - for example_id in xx_list: - main("os", example_id) + for example_id in xx_list[16:]: + main("libreoffice_calc", example_id) diff --git a/test_connection.py b/test_connection.py new file mode 100644 index 0000000..6f58e63 --- /dev/null +++ b/test_connection.py @@ -0,0 +1,39 @@ +#import openai +#import yaml +#from typing import Dict +import os +import requests + +# with open("openaiconfig.yaml") as f: +# config: Dict[str, str] = yaml.load(f, Loader=yaml.Loader) +# openai.api_key = config["api_key"] + +#with open("llmcases/debug-20230420@191814.log.api_version.-1") as f: + #prompt = f.read() +#prompt = "Hello, " +prompt = [ { "role": "user" + , "content": "Hello," + } + ] + +api_key = os.environ.get("OPENAI_API_KEY") +payload = { "model": "gpt-4-vision-preview" + , "messages": prompt + , "max_tokens": 100 + } +headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {api_key}" +} + +response = requests.post( + "https://api.openai.com/v1/chat/completions", + headers=headers, + json=payload +) + +#completion = openai.Completion.create( model="gpt-4-vision-preview" + #, prompt=prompt + #, request_timeout=20. + #) +print(response.json()) From 3dce3ffe63df1fc406ce35e2b1e0e62ce2591625 Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 31 Jan 2024 16:46:26 +0800 Subject: [PATCH 4/5] ver Jan31stv5 parts of calc experiments --- experiment_screenshot_som.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/experiment_screenshot_som.py b/experiment_screenshot_som.py index 8fbcb16..24f8fd7 100644 --- a/experiment_screenshot_som.py +++ b/experiment_screenshot_som.py @@ -199,5 +199,5 @@ if __name__ == '__main__': , "a01fbce3-2793-461f-ab86-43680ccbae25" , "4f07fbe9-70de-4927-a4d5-bb28bc12c52c" ] - for example_id in xx_list[16:]: + for example_id in xx_list[18:]: main("libreoffice_calc", example_id) From 4897211a46aafa241baad9c995c65bf753c2ab0b Mon Sep 17 00:00:00 2001 From: David Chang Date: Wed, 31 Jan 2024 22:55:47 +0800 Subject: [PATCH 5/5] ver Jan31stv6 finished calc human evaluation updated calc configs with an extra sleep to guarantee the integrity of downloaded xlsx file --- branch_flag | 2 +- desktop_env/evaluators/metrics/table.py | 16 ++--- ...269ae-2111-4a07-81fd-3fcd711993b0-win.json | 6 ++ .../01b269ae-2111-4a07-81fd-3fcd711993b0.json | 6 ++ .../0cecd4f3-74de-457b-ba94-29ad6b5dafb6.json | 6 ++ .../1334ca3e-f9e3-4db8-9ca7-b4c653be7d17.json | 6 ++ .../21df9241-f8d7-4509-b7f1-37e501a823f7.json | 6 ++ .../347ef137-7eeb-4c80-a3bb-0951f26a8aff.json | 6 ++ .../357ef137-7eeb-4c80-a3bb-0951f26a8aff.json | 6 ++ .../37608790-6147-45d0-9f20-1137bb35703d.json | 6 ++ .../4188d3a4-077d-46b7-9c86-23e1a036f6c1.json | 6 ++ .../4e6fcf72-daf3-439f-a232-c434ce416af6.json | 62 ++++++++++++++++++- .../4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json | 6 ++ .../51b11269-2ca8-4b2a-9163-f21758420e78.json | 6 ++ .../6054afcb-5bab-4702-90a0-b259b5d3217c.json | 6 ++ .../6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json | 6 ++ .../7a4e4bc8-922c-4c84-865c-25ba34136be1.json | 6 ++ .../7e429b8d-a3f0-4ed0-9b58-08957d00b127.json | 6 ++ .../7efeb4b1-3d19-4762-b163-63328d66303b.json | 6 ++ .../8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json | 6 ++ .../a01fbce3-2793-461f-ab86-43680ccbae25.json | 8 ++- .../a9f325aa-8c05-4e4f-8341-9e4358565f4f.json | 6 ++ .../abed40dc-063f-4598-8ba5-9fe749c0615d.json | 6 ++ .../d681960f-7bc3-4286-9913-a8812ba3261a.json | 6 ++ .../eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json | 6 ++ .../ecb0df7a-4e8d-4a03-b162-053391d3afaf.json | 6 ++ .../f9584479-3d0d-4c79-affa-9ad7afdd8850.json | 6 ++ main.py | 7 ++- 28 files changed, 216 insertions(+), 17 deletions(-) diff --git a/branch_flag b/branch_flag index 760637d..9daeafb 100644 --- a/branch_flag +++ b/branch_flag @@ -1 +1 @@ -exp_som +test diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 4e8bac6..294e2e5 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -392,17 +392,13 @@ if __name__ == '__main__': logger.addHandler(stdout_handler) logger.addHandler(sdebug_handler) - path1 = "../../任务数据/LibreOffice Calc/Multiply_Time_Number.xlsx" - path2 = "../../任务数据/LibreOffice Calc/Multiply_Time_Number_gold.xlsx" - rules = [{"type": "check_cell" - , "sheet_idx": 0 - , "coordinate": "E3" - , "props": {"value": {"method": "approx:0.001" - , "ref": 191.6667 - } - } + path1 = "snapshots/test/cache/4e6fcf72-daf3-439f-a232-c434ce416af6/Employee_Age_By_Birthday.xlsx" + path2 = "snapshots/test/cache/4e6fcf72-daf3-439f-a232-c434ce416af6/Employee_Age_By_Birthday_gold.xlsx" + rules = [ { "type": "sheet_data" + , "sheet_idx0": 0 + , "sheet_idx1": "EI0" } - ] + ] print(compare_table(path1, path2 , rules=rules ) diff --git a/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0-win.json b/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0-win.json index dfa3dcb..355c776 100644 --- a/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0-win.json +++ b/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0-win.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json b/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json index 3ae596d..2dfd0d2 100644 --- a/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json +++ b/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/0cecd4f3-74de-457b-ba94-29ad6b5dafb6.json b/evaluation_examples/examples/libreoffice_calc/0cecd4f3-74de-457b-ba94-29ad6b5dafb6.json index e12377c..97ad2a1 100644 --- a/evaluation_examples/examples/libreoffice_calc/0cecd4f3-74de-457b-ba94-29ad6b5dafb6.json +++ b/evaluation_examples/examples/libreoffice_calc/0cecd4f3-74de-457b-ba94-29ad6b5dafb6.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/1334ca3e-f9e3-4db8-9ca7-b4c653be7d17.json b/evaluation_examples/examples/libreoffice_calc/1334ca3e-f9e3-4db8-9ca7-b4c653be7d17.json index 08b9f8f..1ee0683 100644 --- a/evaluation_examples/examples/libreoffice_calc/1334ca3e-f9e3-4db8-9ca7-b4c653be7d17.json +++ b/evaluation_examples/examples/libreoffice_calc/1334ca3e-f9e3-4db8-9ca7-b4c653be7d17.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json b/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json index 199bc74..e55f7cb 100644 --- a/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json +++ b/evaluation_examples/examples/libreoffice_calc/21df9241-f8d7-4509-b7f1-37e501a823f7.json @@ -69,6 +69,12 @@ "/home/user/Represent_in_millions_billions.xlsx" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json index 9115022..11a1c4b 100644 --- a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json +++ b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/357ef137-7eeb-4c80-a3bb-0951f26a8aff.json b/evaluation_examples/examples/libreoffice_calc/357ef137-7eeb-4c80-a3bb-0951f26a8aff.json index 544a464..58ec20a 100644 --- a/evaluation_examples/examples/libreoffice_calc/357ef137-7eeb-4c80-a3bb-0951f26a8aff.json +++ b/evaluation_examples/examples/libreoffice_calc/357ef137-7eeb-4c80-a3bb-0951f26a8aff.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/37608790-6147-45d0-9f20-1137bb35703d.json b/evaluation_examples/examples/libreoffice_calc/37608790-6147-45d0-9f20-1137bb35703d.json index 1e37657..ab6d275 100644 --- a/evaluation_examples/examples/libreoffice_calc/37608790-6147-45d0-9f20-1137bb35703d.json +++ b/evaluation_examples/examples/libreoffice_calc/37608790-6147-45d0-9f20-1137bb35703d.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/4188d3a4-077d-46b7-9c86-23e1a036f6c1.json b/evaluation_examples/examples/libreoffice_calc/4188d3a4-077d-46b7-9c86-23e1a036f6c1.json index 7b77db3..1e8d045 100644 --- a/evaluation_examples/examples/libreoffice_calc/4188d3a4-077d-46b7-9c86-23e1a036f6c1.json +++ b/evaluation_examples/examples/libreoffice_calc/4188d3a4-077d-46b7-9c86-23e1a036f6c1.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/4e6fcf72-daf3-439f-a232-c434ce416af6.json b/evaluation_examples/examples/libreoffice_calc/4e6fcf72-daf3-439f-a232-c434ce416af6.json index 65df118..a047e10 100644 --- a/evaluation_examples/examples/libreoffice_calc/4e6fcf72-daf3-439f-a232-c434ce416af6.json +++ b/evaluation_examples/examples/libreoffice_calc/4e6fcf72-daf3-439f-a232-c434ce416af6.json @@ -50,12 +50,70 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.usercontent.google.com/download?id=1sRI72UGfHiVBRFuV4uwhr173u3Sf46Y6&export=download&authuser=0&confirm=t&uuid=90da5e2b-39c0-449d-b753-09dfed73b509&at=APZUnTVFInccKo2QB9JNnIidFfG3:1705909465173", + "path": "/home/user/Employee_Age_By_Birthday_gold.xlsx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "/home/user/Employee_Age_By_Birthday_gold.xlsx" + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + }, + { + "type": "activate_window", + "parameters": { + "window_name": "Employee_Age_By_Birthday_gold.xlsx - LibreOffice Calc", + "strict": true + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", "expected": { - "type": "cloud_file", - "path": "https://drive.usercontent.google.com/download?id=1sRI72UGfHiVBRFuV4uwhr173u3Sf46Y6&export=download&authuser=0&confirm=t&uuid=90da5e2b-39c0-449d-b753-09dfed73b509&at=APZUnTVFInccKo2QB9JNnIidFfG3:1705909465173", + "type": "vm_file", + "path": "/home/user/Employee_Age_By_Birthday_gold.xlsx", "dest": "Employee_Age_By_Birthday_gold.xlsx" }, "result": { diff --git a/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json b/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json index 0e8b6d8..b910327 100644 --- a/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json +++ b/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json @@ -69,6 +69,12 @@ "/home/user/Padding_Decimals_In_Formular.xlsx" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/51b11269-2ca8-4b2a-9163-f21758420e78.json b/evaluation_examples/examples/libreoffice_calc/51b11269-2ca8-4b2a-9163-f21758420e78.json index b89606d..80b2167 100644 --- a/evaluation_examples/examples/libreoffice_calc/51b11269-2ca8-4b2a-9163-f21758420e78.json +++ b/evaluation_examples/examples/libreoffice_calc/51b11269-2ca8-4b2a-9163-f21758420e78.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/6054afcb-5bab-4702-90a0-b259b5d3217c.json b/evaluation_examples/examples/libreoffice_calc/6054afcb-5bab-4702-90a0-b259b5d3217c.json index 833a9cb..460b4ca 100644 --- a/evaluation_examples/examples/libreoffice_calc/6054afcb-5bab-4702-90a0-b259b5d3217c.json +++ b/evaluation_examples/examples/libreoffice_calc/6054afcb-5bab-4702-90a0-b259b5d3217c.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json b/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json index c985a2e..18b1da1 100644 --- a/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json +++ b/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json @@ -69,6 +69,12 @@ "/home/user/Keep_Two_decimal_points.xlsx" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/7a4e4bc8-922c-4c84-865c-25ba34136be1.json b/evaluation_examples/examples/libreoffice_calc/7a4e4bc8-922c-4c84-865c-25ba34136be1.json index 7bf9cef..99133d0 100644 --- a/evaluation_examples/examples/libreoffice_calc/7a4e4bc8-922c-4c84-865c-25ba34136be1.json +++ b/evaluation_examples/examples/libreoffice_calc/7a4e4bc8-922c-4c84-865c-25ba34136be1.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/7e429b8d-a3f0-4ed0-9b58-08957d00b127.json b/evaluation_examples/examples/libreoffice_calc/7e429b8d-a3f0-4ed0-9b58-08957d00b127.json index c06cde7..48998ef 100644 --- a/evaluation_examples/examples/libreoffice_calc/7e429b8d-a3f0-4ed0-9b58-08957d00b127.json +++ b/evaluation_examples/examples/libreoffice_calc/7e429b8d-a3f0-4ed0-9b58-08957d00b127.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/7efeb4b1-3d19-4762-b163-63328d66303b.json b/evaluation_examples/examples/libreoffice_calc/7efeb4b1-3d19-4762-b163-63328d66303b.json index 43fcb6b..fe89bcc 100644 --- a/evaluation_examples/examples/libreoffice_calc/7efeb4b1-3d19-4762-b163-63328d66303b.json +++ b/evaluation_examples/examples/libreoffice_calc/7efeb4b1-3d19-4762-b163-63328d66303b.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json b/evaluation_examples/examples/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json index 2c115d0..bb95f4c 100644 --- a/evaluation_examples/examples/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json +++ b/evaluation_examples/examples/libreoffice_calc/8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json b/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json index ce5b308..3db1a03 100644 --- a/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json +++ b/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json @@ -1,7 +1,7 @@ { "id": "a01fbce3-2793-461f-ab86-43680ccbae25", "snapshot": "libreoffice_calc", - "instruction": "I need to set the decimal separator as a comma (,) for localized data representation and clarity in visualization. Can you assist with this?", + "instruction": "I need to set the decimal separator as a comma (,) for localized data representation and clarity in visualization. Can you help me to update all the numbers in the sheet? Also please keep the decimal numbers as-is.", "source": "https://superuser.com/questions/1250677/how-to-set-decimal-separator-in-libre-office-calc", "config": [ { @@ -69,6 +69,12 @@ "/home/user/Set_Decimal_Separator_Dot.xlsx" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/a9f325aa-8c05-4e4f-8341-9e4358565f4f.json b/evaluation_examples/examples/libreoffice_calc/a9f325aa-8c05-4e4f-8341-9e4358565f4f.json index 8b50b36..c0cc65d 100644 --- a/evaluation_examples/examples/libreoffice_calc/a9f325aa-8c05-4e4f-8341-9e4358565f4f.json +++ b/evaluation_examples/examples/libreoffice_calc/a9f325aa-8c05-4e4f-8341-9e4358565f4f.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/abed40dc-063f-4598-8ba5-9fe749c0615d.json b/evaluation_examples/examples/libreoffice_calc/abed40dc-063f-4598-8ba5-9fe749c0615d.json index e3d8cd1..b32cf0a 100644 --- a/evaluation_examples/examples/libreoffice_calc/abed40dc-063f-4598-8ba5-9fe749c0615d.json +++ b/evaluation_examples/examples/libreoffice_calc/abed40dc-063f-4598-8ba5-9fe749c0615d.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/d681960f-7bc3-4286-9913-a8812ba3261a.json b/evaluation_examples/examples/libreoffice_calc/d681960f-7bc3-4286-9913-a8812ba3261a.json index 111f16c..8fbecea 100644 --- a/evaluation_examples/examples/libreoffice_calc/d681960f-7bc3-4286-9913-a8812ba3261a.json +++ b/evaluation_examples/examples/libreoffice_calc/d681960f-7bc3-4286-9913-a8812ba3261a.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json b/evaluation_examples/examples/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json index d2a30ab..215ee46 100644 --- a/evaluation_examples/examples/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json +++ b/evaluation_examples/examples/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json b/evaluation_examples/examples/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json index 3ba14cf..b0afdc6 100644 --- a/evaluation_examples/examples/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json +++ b/evaluation_examples/examples/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json b/evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json index b522e4d..d9ff527 100644 --- a/evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json +++ b/evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json @@ -50,6 +50,12 @@ "import pyautogui; pyautogui.hotkey(\"ctrl\", \"s\");" ] } + }, + { + "type": "sleep", + "parameters": { + "seconds": 0.5 + } } ], "func": "compare_table", diff --git a/main.py b/main.py index c783c00..ce7ee30 100644 --- a/main.py +++ b/main.py @@ -47,11 +47,12 @@ def human_agent(): Runs the Gym environment with human input. """ - with open("evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0-win.json", "r") as f: + with open("evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json", "r") as f: example = json.load(f) - example["snapshot"] = "Snapshot 1" + example["snapshot"] = "Snapshot 30" - env = DesktopEnv( path_to_vm="~/vmware/Windows 10 x64/Windows 10 x64.vmx" + #env = DesktopEnv( path_to_vm="~/vmware/Windows 10 x64/Windows 10 x64.vmx" + env = DesktopEnv( path_to_vm="/mnt/data1/david/os-images/Ubuntu-1218/Ubuntu.vmx" , action_space="computer_13" , task_config=example )