diff --git a/evaluation_examples/examples/chrome/0d8b7de3-e8de-4d86-b9fd-dd2dce58a217.json b/evaluation_examples/examples/chrome/0d8b7de3-e8de-4d86-b9fd-dd2dce58a217.json new file mode 100644 index 0000000..022fc54 --- /dev/null +++ b/evaluation_examples/examples/chrome/0d8b7de3-e8de-4d86-b9fd-dd2dce58a217.json @@ -0,0 +1,18 @@ +{ + "id": "0d8b7de3-e8de-4d86-b9fd-dd2dce58a217", + "snapshot": "chrome", + "instruction": "Browse the natural products database.", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/121ba48f-9e17-48ce-9bc6-a4fb17a7ebba.json b/evaluation_examples/examples/chrome/121ba48f-9e17-48ce-9bc6-a4fb17a7ebba.json new file mode 100644 index 0000000..69ea676 --- /dev/null +++ b/evaluation_examples/examples/chrome/121ba48f-9e17-48ce-9bc6-a4fb17a7ebba.json @@ -0,0 +1,18 @@ +{ + "id": "121ba48f-9e17-48ce-9bc6-a4fb17a7ebba", + "snapshot": "chrome", + "instruction": "Find Dota 2 game and add all DLC to cart.", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/1236ab8d-ad0c-4643-bdf1-9712a35e7e47.json b/evaluation_examples/examples/chrome/1236ab8d-ad0c-4643-bdf1-9712a35e7e47.json new file mode 100644 index 0000000..8951f9f --- /dev/null +++ b/evaluation_examples/examples/chrome/1236ab8d-ad0c-4643-bdf1-9712a35e7e47.json @@ -0,0 +1,18 @@ +{ + "id": "1236ab8d-ad0c-4643-bdf1-9712a35e7e47", + "snapshot": "chrome", + "instruction": "Show me jobs for MBA & Graduate Internships.", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/368d9ba4-203c-40c1-9fa3-da2f1430ce63.json b/evaluation_examples/examples/chrome/368d9ba4-203c-40c1-9fa3-da2f1430ce63.json new file mode 100644 index 0000000..b696afa --- /dev/null +++ b/evaluation_examples/examples/chrome/368d9ba4-203c-40c1-9fa3-da2f1430ce63.json @@ -0,0 +1,18 @@ +{ + "id": "368d9ba4-203c-40c1-9fa3-da2f1430ce63", + "snapshot": "chrome", + "instruction": "find the Monthly forecast for Manchester, GB for January", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/59155008-fe71-45ec-8a8f-dc35497b6aa8.json b/evaluation_examples/examples/chrome/59155008-fe71-45ec-8a8f-dc35497b6aa8.json new file mode 100644 index 0000000..173d49a --- /dev/null +++ b/evaluation_examples/examples/chrome/59155008-fe71-45ec-8a8f-dc35497b6aa8.json @@ -0,0 +1,18 @@ +{ + "id": "", + "snapshot": "chrome", + "instruction": "", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/7f80d204-6db3-4cc2-8098-9b64302c02a1.json b/evaluation_examples/examples/chrome/7f80d204-6db3-4cc2-8098-9b64302c02a1.json new file mode 100644 index 0000000..2435dd9 --- /dev/null +++ b/evaluation_examples/examples/chrome/7f80d204-6db3-4cc2-8098-9b64302c02a1.json @@ -0,0 +1,18 @@ +{ + "id": "7f80d204-6db3-4cc2-8098-9b64302c02a1", + "snapshot": "chrome", + "instruction": "Check drug interaction for melatonin and Folate Forte.", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/92615024-0cb5-409a-a61d-4932e22fdbf4.json b/evaluation_examples/examples/chrome/92615024-0cb5-409a-a61d-4932e22fdbf4.json new file mode 100644 index 0000000..79e7dcd --- /dev/null +++ b/evaluation_examples/examples/chrome/92615024-0cb5-409a-a61d-4932e22fdbf4.json @@ -0,0 +1,18 @@ +{ + "id": "92615024-0cb5-409a-a61d-4932e22fdbf4", + "snapshot": "chrome", + "instruction": "Find the drug interaction between gabapentin and ibuprofen", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/96249224-cb8e-4504-a661-7e88f9efe347.json b/evaluation_examples/examples/chrome/96249224-cb8e-4504-a661-7e88f9efe347.json new file mode 100644 index 0000000..2773938 --- /dev/null +++ b/evaluation_examples/examples/chrome/96249224-cb8e-4504-a661-7e88f9efe347.json @@ -0,0 +1,18 @@ +{ + "id": "96249224-cb8e-4504-a661-7e88f9efe347", + "snapshot": "chrome", + "instruction": "View the speakers that are bluetooth and wireless and filter the results to only show models that are on sale and cost less than $200.", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/99a48441-ead6-4271-8c07-dc03e870f507.json b/evaluation_examples/examples/chrome/99a48441-ead6-4271-8c07-dc03e870f507.json new file mode 100644 index 0000000..815f5f8 --- /dev/null +++ b/evaluation_examples/examples/chrome/99a48441-ead6-4271-8c07-dc03e870f507.json @@ -0,0 +1,18 @@ +{ + "id": "99a48441-ead6-4271-8c07-dc03e870f507", + "snapshot": "chrome", + "instruction": "Look for a White PlayStation 5 Console and save it", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json b/evaluation_examples/examples/chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json new file mode 100644 index 0000000..e5cb31f --- /dev/null +++ b/evaluation_examples/examples/chrome/9f935cce-0a9f-435f-8007-817732bfc0a5.json @@ -0,0 +1,18 @@ +{ + "id": "9f935cce-0a9f-435f-8007-817732bfc0a5", + "snapshot": "chrome", + "instruction": "Browse list of Civil Division forms.", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json b/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json new file mode 100644 index 0000000..95d5b9e --- /dev/null +++ b/evaluation_examples/examples/chrome/a728a36e-8bf1-4bb6-9a03-ef039a5233f0.json @@ -0,0 +1,18 @@ +{ + "id": "a728a36e-8bf1-4bb6-9a03-ef039a5233f0", + "snapshot": "chrome", + "instruction": "Find the Driver License Eligibility Requirements", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/b070486d-e161-459b-aa2b-ef442d973b92.json b/evaluation_examples/examples/chrome/b070486d-e161-459b-aa2b-ef442d973b92.json new file mode 100644 index 0000000..efa4628 --- /dev/null +++ b/evaluation_examples/examples/chrome/b070486d-e161-459b-aa2b-ef442d973b92.json @@ -0,0 +1,18 @@ +{ + "id": "b070486d-e161-459b-aa2b-ef442d973b92", + "snapshot": "chrome", + "instruction": "Show side effects of Tamiflu.", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/b538b896-8e4a-49b6-8d4a-188d5cc2c2ac.json b/evaluation_examples/examples/chrome/b538b896-8e4a-49b6-8d4a-188d5cc2c2ac.json new file mode 100644 index 0000000..7628cde --- /dev/null +++ b/evaluation_examples/examples/chrome/b538b896-8e4a-49b6-8d4a-188d5cc2c2ac.json @@ -0,0 +1,18 @@ +{ + "id": "b538b896-8e4a-49b6-8d4a-188d5cc2c2ac", + "snapshot": "chrome", + "instruction": "Find symptoms of sleep apnea", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/cabb3bae-cccb-41bd-9f5d-0f3a9fecd825.json b/evaluation_examples/examples/chrome/cabb3bae-cccb-41bd-9f5d-0f3a9fecd825.json new file mode 100644 index 0000000..611fba0 --- /dev/null +++ b/evaluation_examples/examples/chrome/cabb3bae-cccb-41bd-9f5d-0f3a9fecd825.json @@ -0,0 +1,18 @@ +{ + "id": "cabb3bae-cccb-41bd-9f5d-0f3a9fecd825", + "snapshot": "chrome", + "instruction": "Browse spider-man toys for kids and sort by lowest price.", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/f0b971a1-6831-4b9b-a50e-22a6e47f45ba.json b/evaluation_examples/examples/chrome/f0b971a1-6831-4b9b-a50e-22a6e47f45ba.json new file mode 100644 index 0000000..f912c1e --- /dev/null +++ b/evaluation_examples/examples/chrome/f0b971a1-6831-4b9b-a50e-22a6e47f45ba.json @@ -0,0 +1,18 @@ +{ + "id": "f0b971a1-6831-4b9b-a50e-22a6e47f45ba", + "snapshot": "chrome", + "instruction": "Show me the scores for the 2019 super bowl", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json b/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json new file mode 100644 index 0000000..d94cd6f --- /dev/null +++ b/evaluation_examples/examples/chrome/f5d96daf-83a8-4c86-9686-bada31fc66ab.json @@ -0,0 +1,18 @@ +{ + "id": "f5d96daf-83a8-4c86-9686-bada31fc66ab", + "snapshot": "chrome", + "instruction": "Compare iPhone 15 Pro Max with iPhone 13 Pro Max", + "source": "Mind2Web", + "config": [], + "trajectory": "trajectories/", + "related_apps": [ + "chrome" + ], + "evaluator": { + "func": "", + "result": { + }, + "expected": { + } + } +} diff --git a/mm_agents/prompts.py b/mm_agents/prompts.py index e23a211..15aefeb 100644 --- a/mm_agents/prompts.py +++ b/mm_agents/prompts.py @@ -4,7 +4,7 @@ You have good knowledge of computer and good internet connection and assume your For each step, you will get an observation of an image, which is the screenshot of the computer screen and you will predict the action of the computer based on the image. You are required to use `pyautogui` to perform the action grounded to the observation, but DONOT use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with. DONOT USE `pyautogui.screenshot()` to make screenshot. -Return one line or multiple lines of python code to perform the action each time, be time efficient. When predicting multiple lines of code, make some small sleep like `time.sleep(0.5);` interval so that the machine could take +Return one line or multiple lines of python code to perform the action each time, be time efficient. When predicting multiple lines of code, make some small sleep like `time.sleep(0.5);` interval so that the machine could take; Each time you need to predict a complete code, no variables or function can be shared from history You need to to specify the coordinates of by yourself based on your observation of current observation, but you should be careful to ensure that the coordinates are correct. You ONLY need to return the code inside a code block, like this: ```python @@ -270,7 +270,7 @@ You have good knowledge of computer and good internet connection and assume your For each step, you will get an observation of the desktop by accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the accessibility tree. You are required to use `pyautogui` to perform the action grounded to the observation, but DONOT use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with. DONOT USE `pyautogui.screenshot()` to make screenshot. -Return one line or multiple lines of python code to perform the action each time, be time efficient. When predicting multiple lines of code, make some small sleep like `time.sleep(0.5);` interval so that the machine could take +Return one line or multiple lines of python code to perform the action each time, be time efficient. When predicting multiple lines of code, make some small sleep like `time.sleep(0.5);` interval so that the machine could take; Each time you need to predict a complete code, no variables or function can be shared from history You need to to specify the coordinates of by yourself based on your observation of current observation, but you should be careful to ensure that the coordinates are correct. You ONLY need to return the code inside a code block, like this: ```python @@ -537,7 +537,7 @@ For each step, you will get an observation of the desktop by 1) a screenshot; an And you will predict the action of the computer based on the screenshot and accessibility tree. You are required to use `pyautogui` to perform the action grounded to the observation, but DONOT use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with. DONOT USE `pyautogui.screenshot()` to make screenshot. -Return one line or multiple lines of python code to perform the action each time, be time efficient. When predicting multiple lines of code, make some small sleep like `time.sleep(0.5);` interval so that the machine could take +Return one line or multiple lines of python code to perform the action each time, be time efficient. When predicting multiple lines of code, make some small sleep like `time.sleep(0.5);` interval so that the machine could take; Each time you need to predict a complete code, no variables or function can be shared from history You need to to specify the coordinates of by yourself based on your observation of current observation, but you should be careful to ensure that the coordinates are correct. You ONLY need to return the code inside a code block, like this: ```python @@ -812,7 +812,7 @@ pyautogui.dragTo(tag_1, button='left') ``` When you think you can directly output precise x and y coordinates or there is no tag on which you want to interact, you can also use them directly. But you should be careful to ensure that the coordinates are correct. -Return one line or multiple lines of python code to perform the action each time, be time efficient. When predicting multiple lines of code, make some small sleep like `time.sleep(0.5);` interval so that the machine could take +Return one line or multiple lines of python code to perform the action each time, be time efficient. When predicting multiple lines of code, make some small sleep like `time.sleep(0.5);` interval so that the machine could take; Each time you need to predict a complete code, no variables or function can be shared from history You need to to specify the coordinates of by yourself based on your observation of current observation, but you should be careful to ensure that the coordinates are correct. You ONLY need to return the code inside a code block, like this: ```python @@ -862,7 +862,7 @@ pyautogui.dragTo(tag_1, button='left') ``` When you think you can directly output precise x and y coordinates or there is no tag on which you want to interact, you can also use them directly. But you should be careful to ensure that the coordinates are correct. -Return one line or multiple lines of python code to perform the action each time, be time efficient. When predicting multiple lines of code, make some small sleep like `time.sleep(0.5);` interval so that the machine could take +Return one line or multiple lines of python code to perform the action each time, be time efficient. When predicting multiple lines of code, make some small sleep like `time.sleep(0.5);` interval so that the machine could take; Each time you need to predict a complete code, no variables or function can be shared from history You need to to specify the coordinates of by yourself based on your observation of current observation, but you should be careful to ensure that the coordinates are correct. You ONLY need to return the code inside a code block, like this: ```python