feat: Add proxy configuration to all 369 evaluation examples - 55 with proxy, 314 without

This commit is contained in:
Timothyxxx
2025-06-05 18:46:53 +08:00
parent 3b1540ed23
commit fb7bafb885
371 changed files with 2901 additions and 2028 deletions

View File

@@ -39,5 +39,6 @@
"expected": "true"
}
}
}
}
},
"proxy": false
}

View File

@@ -62,5 +62,6 @@
]
}
}
}
}
},
"proxy": true
}

View File

@@ -53,32 +53,37 @@
"chrome"
],
"evaluator": {
"func": ["is_expected_active_tab", "is_expected_active_tab"],
"func": [
"is_expected_active_tab",
"is_expected_active_tab"
],
"conj": "or",
"result": [
{
"type": "active_url_from_accessTree",
"goto_prefix": "https://www."
"type": "active_url_from_accessTree",
"goto_prefix": "https://www."
},
{
"type": "active_url_from_accessTree",
"goto_prefix": "https://www."
"goto_prefix": "https://www."
}
],
"expected": [
{
"type": "rule",
"rules": {
"type": "url",
"url": "https://www.drugs.com/npc/"
}
"type": "rule",
"rules": {
"type": "url",
"url": "https://www.drugs.com/npc/"
}
},
{
"type": "rule",
"rules": {
"type": "url",
"url": "https://www.drugs.com/npp/"
}
}
}]
}
}
]
},
"proxy": true
}

View File

@@ -3,26 +3,27 @@
"snapshot": "chrome",
"instruction": "Computer, please navigate to the area in my browser settings where my passwords are stored. I want to check my login information for Etsy without revealing it just yet.",
"source": "https://www.quora.com/What-are-the-cool-tricks-to-use-Google-Chrome",
"config": [
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
}],
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
@@ -37,8 +38,9 @@
"type": "rule",
"rules": {
"type": "url",
"url":"chrome://password-manager/passwords"
"url": "chrome://password-manager/passwords"
}
}
}
}
},
"proxy": false
}

View File

@@ -63,9 +63,10 @@
"type": "rule",
"rules": {
"items": [
"The Dota 2 Official Soundtrack"
"The Dota 2 Official Soundtrack"
]
}
}
}
}
},
"proxy": true
}

View File

@@ -1,87 +1,99 @@
{
"id": "1704f00f-79e6-43a7-961b-cedd3724d5fd",
"snapshot": "chrome",
"instruction": "Find a large car with lowest price from next Monday to next Friday in Zurich.",
"source": "test_task_0",
"config": [
"id": "1704f00f-79e6-43a7-961b-cedd3724d5fd",
"snapshot": "chrome",
"instruction": "Find a large car with lowest price from next Monday to next Friday in Zurich.",
"source": "test_task_0",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.rentalcars.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": [
"check_direct_json_object",
"check_direct_json_object"
],
"result": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
"type": "active_tab_url_parse",
"goto_prefix": "https://www.",
"parse_keys": [
"locationName",
"dropLocationName",
"filterCriteria_carCategory",
"filterCriteria_sortBy"
]
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.rentalcars.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "xpath",
"xpathObject": {
"/html/body/main/div/div/div/section/div/div/div/div[1]/div[1]/p": "from",
"/html/body/main/div/div/div/section/div/div/div/div[1]/div[3]/p": "to"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":["check_direct_json_object", "check_direct_json_object"],
"result": [{
"type": "active_tab_url_parse",
"goto_prefix": "https://www.",
"parse_keys": ["locationName", "dropLocationName", "filterCriteria_carCategory", "filterCriteria_sortBy"]
},
{
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "xpath",
"xpathObject":{
"/html/body/main/div/div/div/section/div/div/div/div[1]/div[1]/p": "from",
"/html/body/main/div/div/div/section/div/div/div/div[1]/div[3]/p": "to"
"expected": [
{
"type": "rule",
"rules": {
"expected": {
"locationName": "Zürich",
"dropLocationName": "Zürich",
"filterCriteria_carCategory": "large",
"filterCriteria_sortBy": "PRICE"
}
}],
"expected":[{
"type": "rule",
"rules":{
"expected": {
"locationName": "Zürich",
"dropLocationName": "Zürich",
"filterCriteria_carCategory": "large",
"filterCriteria_sortBy": "PRICE"
}
}
},
{
"type": "rule_relativeTime",
"rules":{
"relativeTime":{
"from":"next Monday",
"to":"next Friday"
},
"expected": {
"from": "{DoW}, {DayD} {Month} {Year}, 10:00",
"to": "{DoW}, {DayD} {Month} {Year}, 10:00"
}
},
{
"type": "rule_relativeTime",
"rules": {
"relativeTime": {
"from": "next Monday",
"to": "next Friday"
},
"expected": {
"from": "{DoW}, {DayD} {Month} {Year}, 10:00",
"to": "{DoW}, {DayD} {Month} {Year}, 10:00"
}
}}
]
}
}
}
}
]
},
"proxy": true
}

View File

@@ -1,63 +1,62 @@
{
"id": "2888b4e6-5b47-4b57-8bf5-c73827890774",
"snapshot": "chrome",
"instruction": "Find a men's T-Shirt that is in large size with a stripe pattern, short sleeve and under the Sales&Discount.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.macys.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
"id": "2888b4e6-5b47-4b57-8bf5-c73827890774",
"snapshot": "chrome",
"instruction": "Find a men's T-Shirt that is in large size with a stripe pattern, short sleeve and under the Sales&Discount.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.macys.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"exact_match",
"result": {
"type": "url_dashPart",
"goto_prefix": "https://www.",
"partIndex": -1,
"needDeleteId": true,
"returnType": "string"
},
"expected":{
"type": "rule",
"rules":{
"expected": "Stripe,Men,L,Short%20Sleeve,Sales%20%26%20Discounts"
}
}
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "exact_match",
"result": {
"type": "url_dashPart",
"goto_prefix": "https://www.",
"partIndex": -1,
"needDeleteId": true,
"returnType": "string"
},
"expected": {
"type": "rule",
"rules": {
"expected": "Stripe,Men,L,Short%20Sleeve,Sales%20%26%20Discounts"
}
}
},
"proxy": true
}

View File

@@ -37,8 +37,11 @@
"type": "rule",
"rules": {
"type": "bookmark_bar_folders_names",
"names": ["Favorites"]
"names": [
"Favorites"
]
}
}
}
}
},
"proxy": false
}

View File

@@ -59,5 +59,6 @@
"expected": "Thomas"
}
}
}
}
},
"proxy": false
}

View File

@@ -53,5 +53,6 @@
"expected": "true"
}
}
}
}
},
"proxy": false
}

View File

@@ -48,5 +48,6 @@
"name": "Play Puzzle Game 2048"
}
}
}
}
},
"proxy": true
}

View File

@@ -1,78 +1,87 @@
{
"id": "368d9ba4-203c-40c1-9fa3-da2f1430ce63",
"snapshot": "chrome",
"instruction": "find the Monthly forecast for Manchester, GB for this month",
"source": "test_task_1",
"config": [
"id": "368d9ba4-203c-40c1-9fa3-da2f1430ce63",
"snapshot": "chrome",
"instruction": "find the Monthly forecast for Manchester, GB for this month",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.accuweather.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": [
"check_direct_json_object",
"is_expected_url_pattern_match"
],
"result": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
"type": "url_dashPart",
"goto_prefix": "https://www.",
"partIndex": -2,
"needDeleteId": false,
"returnType": "json",
"key": "time"
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.accuweather.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
"type": "active_url_from_accessTree",
"goto_prefix": "https://www."
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":["check_direct_json_object", "is_expected_url_pattern_match"],
"result": [{
"type": "url_dashPart",
"goto_prefix": "https://www.",
"partIndex": -2,
"needDeleteId": false,
"returnType": "json",
"key":"time"
},
{
"type": "active_url_from_accessTree",
"goto_prefix": "https://www."
}],
"expected":[
{
"type": "rule_relativeTime",
"rules":{
"relativeTime": {
"from": "this month"
},
"expected": {
"time": "{month}-weather"
}
}
},
{
"type": "rule",
"rules":{
"expected": ["\/manchester\/"]
}
}]
}
}
"expected": [
{
"type": "rule_relativeTime",
"rules": {
"relativeTime": {
"from": "this month"
},
"expected": {
"time": "{month}-weather"
}
}
},
{
"type": "rule",
"rules": {
"expected": [
"/manchester/"
]
}
}
]
},
"proxy": true
}

View File

@@ -3,13 +3,13 @@
"snapshot": "chrome",
"instruction": "I am more familiar with Korean as I am from Korea. I want to use chrome with my mother tongue. Could you help me change the Chrome interface language to Korean? ",
"source": "https://superuser.com/questions/984668/change-interface-language-of-chrome-to-english",
"config": [
],
"config": [],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "infeasible"
}
}
},
"proxy": false
}

View File

@@ -270,5 +270,6 @@
]
}
}
}
}
},
"proxy": false
}

View File

@@ -1,111 +1,117 @@
{
"id": "47543840-672a-467d-80df-8f7c3b9788c9",
"snapshot": "chrome",
"instruction": "Find and select the car with the most number of seats to pick up in Boston Logan Intl Airport from 10th next month to 11th next month.",
"source": "test_task_1",
"config": [
"id": "47543840-672a-467d-80df-8f7c3b9788c9",
"snapshot": "chrome",
"instruction": "Find and select the car with the most number of seats to pick up in Boston Logan Intl Airport from 10th next month to 11th next month.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.budget.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": [
"is_expected_url_pattern_match",
"check_direct_json_object",
"check_direct_json_object"
],
"conj": "and",
"result": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
"type": "active_url_from_accessTree",
"goto_prefix": "https://www."
},
{
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "class",
"class_singleObject": {},
"class_multiObject": {
"location-info": {
"0": "start_location",
"1": "end_location"
},
"day-time-info": {
"0": "from",
"1": "to"
}
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.budget.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "xpath",
"xpathObject": {
"/html/body/div[6]/div[2]/div[1]/div/div/div[2]/div[1]/section[1]/div/form/div[1]/div[2]/div/a": "rank"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":["is_expected_url_pattern_match", "check_direct_json_object", "check_direct_json_object"],
"conj": "and",
"result": [
{
"type": "active_url_from_accessTree",
"goto_prefix": "https://www."
},
{
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "class",
"class_singleObject":{},
"class_multiObject":{
"location-info":{
"0": "start_location",
"1": "end_location"
},
"day-time-info":{
"0": "from",
"1": "to"
}
}
},
{
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "xpath",
"xpathObject":{
"/html/body/div[6]/div[2]/div[1]/div/div/div[2]/div[1]/section[1]/div/form/div[1]/div[2]/div/a": "rank"
}
}
],
"expected":[
{
"type": "rule",
"rules":{
"expected": ["reservation#\/vehicles"]
}
},
{
"type": "rule_relativeTime",
"rules":{
"relativeTime":{
"from":"10th next month",
"to": "11th next month"
},
"expected": {
"start_location": "Boston Logan Intl Airport,\n\t\t\t\t\t\t\t\tBOS \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t Pick-Up",
"end_location": "Boston Logan Intl Airport,\n\t\t\t\t\t\t\t\tBOS",
"from": "{DoW}, {Month} {Day0D}, 12:00 PM",
"to": "{DoW}, {Month} {Day0D}, 12:00 PM"
}
}
},
{
"type": "rule",
"rules":{
"expected": {
"rank": "Number of Seats (High to Low)"
}
}
}
]
}
}
"expected": [
{
"type": "rule",
"rules": {
"expected": [
"reservation#/vehicles"
]
}
},
{
"type": "rule_relativeTime",
"rules": {
"relativeTime": {
"from": "10th next month",
"to": "11th next month"
},
"expected": {
"start_location": "Boston Logan Intl Airport,\n\t\t\t\t\t\t\t\tBOS \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t\t\n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t \n\t\t\t\t\t\t\t Pick-Up",
"end_location": "Boston Logan Intl Airport,\n\t\t\t\t\t\t\t\tBOS",
"from": "{DoW}, {Month} {Day0D}, 12:00 PM",
"to": "{DoW}, {Month} {Day0D}, 12:00 PM"
}
}
},
{
"type": "rule",
"rules": {
"expected": {
"rank": "Number of Seats (High to Low)"
}
}
}
]
},
"proxy": true
}

View File

@@ -63,5 +63,6 @@
]
}
}
}
}
},
"proxy": false
}

View File

@@ -65,5 +65,6 @@
"url": "https://www.babycenter.com/baby-names/details/carl-853"
}
}
}
}
},
"proxy": true
}

View File

@@ -57,5 +57,6 @@
"expected": "/home/user/Desktop/helloExtension"
}
}
}
},
"proxy": false
}

View File

@@ -1,78 +1,78 @@
{
"id": "6c4c23a1-42a4-43cc-9db1-2f86ff3738cc",
"snapshot": "chrome",
"instruction": "Find flights from Seattle to New York on 5th next month and only show those that can be purchased with miles.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
"id": "6c4c23a1-42a4-43cc-9db1-2f86ff3738cc",
"snapshot": "chrome",
"instruction": "Find flights from Seattle to New York on 5th next month and only show those that can be purchased with miles.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.delta.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "check_direct_json_object",
"result": {
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "class",
"class_singleObject": {
"search-date": "time",
"price-in-tabs__nav--selected": "category"
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.delta.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"class_multiObject": {
"search-segment-cities__city": {
"0": "start",
"1": "end"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"check_direct_json_object",
"result": {
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "class",
"class_singleObject":{
"search-date": "time",
"price-in-tabs__nav--selected": "category"
},
"class_multiObject":{
"search-segment-cities__city": {
"0": "start",
"1": "end"
}
}
},
"expected": {
"type": "rule_relativeTime",
"rules": {
"relativeTime": {
"from": "5th next month"
},
"expected":{
"type": "rule_relativeTime",
"rules":{
"relativeTime": {
"from": "5th next month"
},
"expected": {
"start": "SEA",
"end": "NYC",
"time": "{DoW}, {Month} {DayD}, {Year}",
"category": "Miles"
}
}
"expected": {
"start": "SEA",
"end": "NYC",
"time": "{DoW}, {Month} {DayD}, {Year}",
"category": "Miles"
}
}
}
}
},
"proxy": true
}

View File

@@ -46,8 +46,11 @@
"type": "rule",
"rules": {
"type": "bookmark_bar_websites_urls",
"urls": ["https://jalammar.github.io/illustrated-transformer/"]
"urls": [
"https://jalammar.github.io/illustrated-transformer/"
]
}
}
}
}
},
"proxy": true
}

View File

@@ -47,8 +47,11 @@
"type": "rule",
"rules": {
"type": "domains",
"domains": [".amazon.com"]
"domains": [
".amazon.com"
]
}
}
}
}
},
"proxy": true
}

View File

@@ -1,63 +1,66 @@
{
"id": "7f52cab9-535c-4835-ac8c-391ee64dc930",
"snapshot": "chrome",
"instruction": "Create a list of drip coffee makers that are on sale and within $25-60 and have a black finish.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://shopping.google.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"id": "7f52cab9-535c-4835-ac8c-391ee64dc930",
"snapshot": "chrome",
"instruction": "Create a list of drip coffee makers that are on sale and within $25-60 and have a black finish.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://shopping.google.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "check_direct_json_object",
"result": {
"type": "active_tab_url_parse",
"goto_prefix": "https://www.",
"parse_keys": [
"q",
"tbs"
]
},
"expected": {
"type": "rule",
"rules": {
"expected": {
"q": "drip coffee maker",
"tbs": "mr:1,price:1,ppr_min:25,ppr_max:60,sales:1,pdtr0:1825161|1825162"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"check_direct_json_object",
"result": {
"type": "active_tab_url_parse",
"goto_prefix": "https://www.",
"parse_keys": ["q", "tbs"]
},
"expected":{
"type": "rule",
"rules":{
"expected": {
"q": "drip coffee maker",
"tbs": "mr:1,price:1,ppr_min:25,ppr_max:60,sales:1,pdtr0:1825161|1825162"
}
}
}
}
}
},
"proxy": true
}

View File

@@ -1,65 +1,70 @@
{
"id": "82279c77-8fc6-46f6-9622-3ba96f61b477",
"snapshot": "chrome",
"instruction": "Find electric cars with a maximum price of $50,000 within 50 miles of 10001.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.cars.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"id": "82279c77-8fc6-46f6-9622-3ba96f61b477",
"snapshot": "chrome",
"instruction": "Find electric cars with a maximum price of $50,000 within 50 miles of 10001.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.cars.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "check_direct_json_object",
"result": {
"type": "active_tab_url_parse",
"goto_prefix": "https://www.",
"parse_keys": [
"list_price_max",
"maximum_distance",
"zip",
"fuel_slugs[]"
]
},
"expected": {
"type": "rule",
"rules": {
"expected": {
"list_price_max": "50000",
"maximum_distance": "50",
"zip": "10001",
"fuel_slugs[]": "electric"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"check_direct_json_object",
"result": {
"type": "active_tab_url_parse",
"goto_prefix": "https://www.",
"parse_keys": ["list_price_max", "maximum_distance", "zip","fuel_slugs[]"]
},
"expected":{
"type": "rule",
"rules":{
"expected": {
"list_price_max": "50000",
"maximum_distance": "50",
"zip":"10001",
"fuel_slugs[]":"electric"
}
}
}
}
}
},
"proxy": true
}

View File

@@ -1,69 +1,74 @@
{
"id": "82bc8d6a-36eb-4d2d-8801-ef714fb1e55a",
"snapshot": "chrome",
"instruction": "On next Monday, look up a flight from Mumbai to Stockholm.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.qatarairways.com/en-hk/homepage.html"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"id": "82bc8d6a-36eb-4d2d-8801-ef714fb1e55a",
"snapshot": "chrome",
"instruction": "On next Monday, look up a flight from Mumbai to Stockholm.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.qatarairways.com/en-hk/homepage.html"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "check_direct_json_object",
"result": {
"type": "active_tab_url_parse",
"goto_prefix": "https://www.",
"parse_keys": [
"fromStation",
"toStation",
"departing"
],
"replace": {
"departing": "time"
}
},
"expected": {
"type": "rule_relativeTime",
"rules": {
"relativeTime": {
"from": "next Monday"
},
"expected": {
"fromStation": "BOM",
"toStation": "STO",
"time": "{Year}-{Month0D}-{Day0D}"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"check_direct_json_object",
"result": {
"type": "active_tab_url_parse",
"goto_prefix": "https://www.",
"parse_keys": ["fromStation", "toStation", "departing"],
"replace":{
"departing": "time"
}
},
"expected":{
"type": "rule_relativeTime",
"rules":{
"relativeTime":{
"from": "next Monday"
},
"expected": {
"fromStation": "BOM",
"toStation": "STO",
"time": "{Year}-{Month0D}-{Day0D}"
}
}
}
}
}
},
"proxy": true
}

View File

@@ -3,13 +3,13 @@
"snapshot": "chrome",
"instruction": "Could you assist me in turning off the dark mode feature in Google Chrome? I've noticed that while dark mode is great for reducing glare, it actually makes it more challenging for me to read text clearly, especially with my astigmatism.",
"source": "https://superuser.com/questions/1417973/how-to-disable-google-chrome-dark-mode",
"config": [
],
"config": [],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "infeasible"
}
}
},
"proxy": false
}

View File

@@ -48,5 +48,6 @@
"expected": "true"
}
}
}
}
},
"proxy": false
}

View File

@@ -29,13 +29,15 @@
"chrome"
],
"evaluator": {
"postconfig":[{
"type": "execute",
"parameters": {
"command": "pkill chrome",
"shell": "true"
"postconfig": [
{
"type": "execute",
"parameters": {
"command": "pkill chrome",
"shell": "true"
}
}
}],
],
"func": "exact_match",
"result": {
"type": "data_delete_automacally"
@@ -46,5 +48,6 @@
"expected": "true"
}
}
}
}
},
"proxy": false
}

View File

@@ -1,78 +1,83 @@
{
"id": "9f3f70fc-5afc-4958-a7b7-3bb4fcb01805",
"snapshot": "chrome",
"instruction": "Browse the list of women's Nike jerseys over $60.",
"source": "test_task_1",
"config": [
"id": "9f3f70fc-5afc-4958-a7b7-3bb4fcb01805",
"snapshot": "chrome",
"instruction": "Browse the list of women's Nike jerseys over $60.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.nba.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": [
"is_expected_url_pattern_match",
"check_direct_json_object"
],
"conj": "and",
"result": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
"type": "active_tab_info"
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.nba.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"type": "active_tab_html_parse",
"category": "xpath",
"xpathObject": {
"/html/body/div[2]/div/div[6]/div[2]/div[2]/div/div[1]/div[4]/ul/li[2]": "money"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":["is_expected_url_pattern_match", "check_direct_json_object"],
"conj": "and",
"result": [
{
"type": "active_tab_info"
},
{
"type": "active_tab_html_parse",
"category": "xpath",
"xpathObject":{
"/html/body/div[2]/div/div[6]/div[2]/div[2]/div/div[1]/div[4]/ul/li[2]": "money"
}
}
],
"expected":[
{
"type": "rule",
"rules":{
"expected": ["\/women-jerseys\/"]
}
},
{
"type": "rule",
"rules":{
"expected": {
"money": "over $60"
}
}
}
]
}
}
"expected": [
{
"type": "rule",
"rules": {
"expected": [
"/women-jerseys/"
]
}
},
{
"type": "rule",
"rules": {
"expected": {
"money": "over $60"
}
}
}
]
},
"proxy": true
}

View File

@@ -56,5 +56,6 @@
]
}
}
}
},
"proxy": true
}

View File

@@ -65,5 +65,6 @@
"url": "https://www.dmv.virginia.gov/licenses-ids/license/applying/eligibility"
}
}
}
}
},
"proxy": true
}

View File

@@ -1,69 +1,70 @@
{
"id": "a96b564e-dbe9-42c3-9ccf-b4498073438a",
"snapshot": "chrome",
"instruction": "Find discussions of community and open one with most replies.",
"source": "test_task_0",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.flightaware.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);"
]
}
"id": "a96b564e-dbe9-42c3-9ccf-b4498073438a",
"snapshot": "chrome",
"instruction": "Find discussions of community and open one with most replies.",
"source": "test_task_0",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"is_expected_active_tab",
"result": {
"type": "active_tab_info",
"goto_prefix": "https://www."
},
"expected":{
"type": "rule",
"rules":{
"type": "url",
"url": "https://discussions.flightaware.com/t/the-banter-thread/4412"
}
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.flightaware.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "is_expected_active_tab",
"result": {
"type": "active_tab_info",
"goto_prefix": "https://www."
},
"expected": {
"type": "rule",
"rules": {
"type": "url",
"url": "https://discussions.flightaware.com/t/the-banter-thread/4412"
}
}
},
"proxy": true
}

View File

@@ -3,13 +3,13 @@
"snapshot": "chrome",
"instruction": "Could you please change the number of search results displayed on one page to 50? I find that having more results visible at once significantly enhances my research efficiency, as it reduces the need to constantly click through multiple pages. ",
"source": "https://support.google.com/chrome/thread/219988391/increase-search-results-per-page?hl=en",
"config": [
],
"config": [],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "infeasible"
}
}
},
"proxy": false
}

View File

@@ -61,5 +61,6 @@
"max": 99999
}
}
}
}
},
"proxy": false
}

View File

@@ -43,24 +43,28 @@
"chrome"
],
"evaluator": {
"func": ["exact_match", "exact_match"],
"conj": "or",
"result": [
{
"func": [
"exact_match",
"exact_match"
],
"conj": "or",
"result": [
{
"type": "url_dashPart",
"goto_prefix": "https://www.",
"partIndex": -1,
"needDeleteId": false,
"returnType": "string"
},
{
"type": "url_dashPart",
"goto_prefix": "https://www.",
"partIndex": -1,
"needDeleteId": false,
"returnType": "string"
}],
"expected": [
},
{
"type": "url_dashPart",
"goto_prefix": "https://www.",
"partIndex": -1,
"needDeleteId": false,
"returnType": "string"
}
],
"expected": [
{
"type": "rule",
"rules": {
@@ -72,6 +76,8 @@
"rules": {
"expected": "tamiflu-side-effects.html"
}
}]
}
}
]
},
"proxy": true
}

View File

@@ -1,67 +1,66 @@
{
"id": "b4f95342-463e-4179-8c3f-193cd7241fb2",
"snapshot": "chrome",
"instruction": "Find the next available date for Albion Basin.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.recreation.gov/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"id": "b4f95342-463e-4179-8c3f-193cd7241fb2",
"snapshot": "chrome",
"instruction": "Find the next available date for Albion Basin.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.recreation.gov/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "check_direct_json_object",
"result": {
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "class",
"class_singleObject": {},
"class_multiObject": {
"camp-sortable-column-header": {
"2": "camp-sortable-column-header"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"check_direct_json_object",
"result": {
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category":"class",
"class_singleObject":{},
"class_multiObject":{
"camp-sortable-column-header":{
"2":"camp-sortable-column-header"
}
}
},
"expected":{
"type":"gotoRecreationPage_and_get_html_content",
"selector": "class",
"class": "camp-sortable-column-header",
"order": "2"
}
},
"expected": {
"type": "gotoRecreationPage_and_get_html_content",
"selector": "class",
"class": "camp-sortable-column-header",
"order": "2"
}
}
},
"proxy": true
}

View File

@@ -1,78 +1,77 @@
{
"id": "b7895e80-f4d1-4648-bee0-4eb45a6f1fa8",
"snapshot": "chrome",
"instruction": "Find a Hotel in New York City with lowest price possible for 2 adults this weekend.",
"source": "test_task_0",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.tripadvisor.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"id": "b7895e80-f4d1-4648-bee0-4eb45a6f1fa8",
"snapshot": "chrome",
"instruction": "Find a Hotel in New York City with lowest price possible for 2 adults this weekend.",
"source": "test_task_0",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.tripadvisor.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "check_direct_json_object",
"result": {
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "xpath",
"xpathObject": {
"/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[2]/div/div/div/div/div[1]/div/button/div[3]": "from",
"/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[2]/div/div/div/div/div[2]/button/div[3]": "to",
"/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[1]/div/h1": "city",
"/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[2]/div/div/div/div/div[3]/button/div[3]/span/span[2]": "adult",
"/html/body/div[1]/main/div[3]/div/div[2]/div/div[1]/div/div[2]/div[1]/div/div[1]/div/div[1]/div[2]/div/div[2]/div/button/div/div": "rank"
}
},
"expected": {
"type": "rule_relativeTime",
"rules": {
"relativeTime": {
"from": "this Saturday",
"to": "this Sunday"
},
"expected": {
"from": "{DoW}, {Month} {Day0D}",
"to": "{DoW}, {Month} {Day0D}",
"city": "New York City Hotels",
"adult": "2 adults",
"rank": "Price (low to high)"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"check_direct_json_object",
"result": {
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "xpath",
"xpathObject":{
"/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[2]/div/div/div/div/div[1]/div/button/div[3]":"from",
"/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[2]/div/div/div/div/div[2]/button/div[3]":"to",
"/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[1]/div/h1":"city",
"/html/body/div[1]/main/div[3]/div/div[1]/div[2]/div[1]/div[2]/div/div/div/div/div[3]/button/div[3]/span/span[2]":"adult",
"/html/body/div[1]/main/div[3]/div/div[2]/div/div[1]/div/div[2]/div[1]/div/div[1]/div/div[1]/div[2]/div/div[2]/div/button/div/div":"rank"
}
},
"expected":
{
"type": "rule_relativeTime",
"rules":{
"relativeTime": {
"from": "this Saturday",
"to": "this Sunday"
},
"expected": {
"from": "{DoW}, {Month} {Day0D}",
"to": "{DoW}, {Month} {Day0D}",
"city": "New York City Hotels",
"adult": "2 adults",
"rank": "Price (low to high)"
}
}
}
}
}
},
"proxy": true
}

View File

@@ -36,8 +36,12 @@
"expected": {
"type": "rule",
"rules": {
"expected": ["Microsoft Bing", "Bing"]
"expected": [
"Microsoft Bing",
"Bing"
]
}
}
}
}
},
"proxy": false
}

View File

@@ -1,69 +1,71 @@
{
"id": "c1fa57f3-c3db-4596-8f09-020701085416",
"snapshot": "chrome",
"instruction": "Open the baggage fee calculator.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.united.com/en/us"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);"
]
}
"id": "c1fa57f3-c3db-4596-8f09-020701085416",
"snapshot": "chrome",
"instruction": "Open the baggage fee calculator.",
"source": "test_task_1",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.united.com/en/us"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('alt', 'f10'); time.sleep(0.5);"
]
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"is_expected_url_pattern_match",
"result": {
"type": "active_tab_info",
"goto_prefix": "https://www."
},
"expected":{
"type": "rule",
"rules":{
"expected": ["checked-bag-fee-calculator"]
}
}
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "is_expected_url_pattern_match",
"result": {
"type": "active_tab_info",
"goto_prefix": "https://www."
},
"expected": {
"type": "rule",
"rules": {
"expected": [
"checked-bag-fee-calculator"
]
}
}
},
"proxy": true
}

View File

@@ -43,20 +43,21 @@
"chrome"
],
"evaluator": {
"func": "is_expected_url_pattern_match",
"result": {
"type": "active_url_from_accessTree",
"goto_prefix": "https://www."
},
"expected": {
"type": "rule",
"rules": {
"expected": [
"AgeAppropriate:Kids",
"search=spider[-%20]?man%20toys",
"S=4"
]
}
"func": "is_expected_url_pattern_match",
"result": {
"type": "active_url_from_accessTree",
"goto_prefix": "https://www."
},
"expected": {
"type": "rule",
"rules": {
"expected": [
"AgeAppropriate:Kids",
"search=spider[-%20]?man%20toys",
"S=4"
]
}
}
}
},
"proxy": true
}

View File

@@ -1,102 +1,109 @@
{
"id": "da46d875-6b82-4681-9284-653b0c7ae241",
"snapshot": "chrome",
"instruction": "Schedule an appointment to apply for transportation access pass in the Charlie Card store on the first Monday four months later, 10:15 am, fill in my details (James Smith, james.smith@gmail.com). And don not click \"book\" directly. Let me review it.",
"source": "test_task_2",
"config": [
"id": "da46d875-6b82-4681-9284-653b0c7ae241",
"snapshot": "chrome",
"instruction": "Schedule an appointment to apply for transportation access pass in the Charlie Card store on the first Monday four months later, 10:15 am, fill in my details (James Smith, james.smith@gmail.com). And don not click \"book\" directly. Let me review it.",
"source": "test_task_2",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.mbta.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": [
"is_expected_url_pattern_match",
"check_direct_json_object",
"check_direct_json_object"
],
"conj": "and",
"result": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
"type": "active_tab_info"
},
{
"type": "active_tab_html_parse",
"category": "class",
"class_singleObject": {},
"class_multiObject": {
"breakword": {
"1": "content",
"2": "time"
}
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.mbta.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"type": "active_tab_html_parse",
"category": "input",
"inputObject": {
"/html/body/div/div/form/div[7]/div/div/div[1]/input[1]": "name",
"/html/body/div/div/form/div[7]/div/div/div[1]/input[2]": "mail"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":["is_expected_url_pattern_match", "check_direct_json_object", "check_direct_json_object"],
"conj": "and",
"result": [
{
"type": "active_tab_info"
},
{
"type": "active_tab_html_parse",
"category": "class",
"class_singleObject":{},
"class_multiObject":{
"breakword":{
"1": "content",
"2": "time"
}
}
},
{
"type": "active_tab_html_parse",
"category": "input",
"inputObject":{
"/html/body/div/div/form/div[7]/div/div/div[1]/input[1]": "name",
"/html/body/div/div/form/div[7]/div/div/div[1]/input[2]": "mail"
}
}
],
"expected":[
{
"type": "rule",
"rules":{
"expected": ["CharlieCardStoreAppointments@mbta.com\/bookings\/"]
}
},
{
"type": "rule_relativeTime",
"rules":{
"relativeTime":{
"from":"first monday four months later"
},
"expected": {
"content": "Apply for Transportation Access Pass (TAP) CharlieCard non-auto approval",
"time": "{MonthFull} {Day0D}, 10:15 am"
}
}
},
{
"type": "rule",
"rules":{
"expected": {
"name": "James Smith",
"mail": "james.smith@gmail.com"
}
}
}
]
}
}
"expected": [
{
"type": "rule",
"rules": {
"expected": [
"CharlieCardStoreAppointments@mbta.com/bookings/"
]
}
},
{
"type": "rule_relativeTime",
"rules": {
"relativeTime": {
"from": "first monday four months later"
},
"expected": {
"content": "Apply for Transportation Access Pass (TAP) CharlieCard non-auto approval",
"time": "{MonthFull} {Day0D}, 10:15 am"
}
}
},
{
"type": "rule",
"rules": {
"expected": {
"name": "James Smith",
"mail": "james.smith@gmail.com"
}
}
}
]
},
"proxy": true
}

View File

@@ -48,5 +48,6 @@
"path": "https://lilianweng.github.io/posts/2023-06-23-agent/",
"dest": "LLM Powered Autonomous Agents _ Lil'Log_gold.pdf"
}
}
}
},
"proxy": true
}

View File

@@ -65,5 +65,6 @@
"url": "https://www.nfl.com/scores/2019/POST4"
}
}
}
}
},
"proxy": true
}

View File

@@ -1,59 +1,61 @@
{
"id": "f3b19d1e-2d48-44e9-b4e1-defcae1a0197",
"snapshot": "chrome",
"instruction": "Find help page about buying tickets.",
"source": "test_task_0",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://seatgeek.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
"id": "f3b19d1e-2d48-44e9-b4e1-defcae1a0197",
"snapshot": "chrome",
"instruction": "Find help page about buying tickets.",
"source": "test_task_0",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://seatgeek.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"is_expected_url_pattern_match",
"result": {
"type": "active_tab_info",
"goto_prefix": "https://www."
},
"expected":{
"type": "rule",
"rules":{
"expected": ["Buying-Tickets"]
}
}
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "is_expected_url_pattern_match",
"result": {
"type": "active_tab_info",
"goto_prefix": "https://www."
},
"expected": {
"type": "rule",
"rules": {
"expected": [
"Buying-Tickets"
]
}
}
},
"proxy": true
}

View File

@@ -65,5 +65,6 @@
"url": "https://www.apple.com/iphone/compare/?modelList=iphone-15-pro-max,iphone-15-pro,iphone-13-pro-max"
}
}
}
}
},
"proxy": true
}

View File

@@ -1,74 +1,82 @@
{
"id": "f79439ad-3ee8-4f99-a518-0eb60e5652b0",
"snapshot": "chrome",
"instruction": "Search for a one way flight from Dublin to Vienna on 10th next month for 2 adults.",
"source": "test_task_2",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.ryanair.com/gb/en"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"id": "f79439ad-3ee8-4f99-a518-0eb60e5652b0",
"snapshot": "chrome",
"instruction": "Search for a one way flight from Dublin to Vienna on 10th next month for 2 adults.",
"source": "test_task_2",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.ryanair.com/gb/en"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "check_direct_json_object",
"result": {
"type": "active_tab_url_parse",
"goto_prefix": "https://www.",
"parse_keys": [
"originIata",
"destinationIata",
"tpAdults",
"tpTeens",
"tpChildren",
"tpStartDate",
"isReturn"
],
"replace": {
"tpStartDate": "time"
}
},
"expected": {
"type": "rule_relativeTime",
"rules": {
"relativeTime": {
"from": "10th next month"
},
"expected": {
"originIata": "DUB",
"destinationIata": "VIE",
"tpAdults": "2",
"tpTeens": "0",
"tpChildren": "0",
"time": "{Year}-{Month0D}-{DayD}",
"isReturn": "false"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"check_direct_json_object",
"result": {
"type": "active_tab_url_parse",
"goto_prefix": "https://www.",
"parse_keys":["originIata", "destinationIata", "tpAdults", "tpTeens", "tpChildren", "tpStartDate", "isReturn"],
"replace":{
"tpStartDate": "time"
}
},
"expected":{
"type": "rule_relativeTime",
"rules":{
"relativeTime": {
"from": "10th next month"
},
"expected": {
"originIata": "DUB",
"destinationIata": "VIE",
"tpAdults": "2",
"tpTeens": "0",
"tpChildren": "0",
"time": "{Year}-{Month0D}-{DayD}",
"isReturn":"false"
}
}
}
}
}
},
"proxy": true
}

View File

@@ -1,76 +1,76 @@
{
"id": "fc6d8143-9452-4171-9459-7f515143419a",
"snapshot": "chrome",
"instruction": "Find the status of tomorrow flights from New York airports to Columbus in Ohio.",
"source": "test_task_0",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
"id": "fc6d8143-9452-4171-9459-7f515143419a",
"snapshot": "chrome",
"instruction": "Find the status of tomorrow flights from New York airports to Columbus in Ohio.",
"source": "test_task_0",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.delta.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func": "check_direct_json_object",
"result": {
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "class",
"class_singleObject": {
"search-date": "time"
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.delta.com/"
]
}
},
{
"type": "activate_window",
"parameters": {
"window_name": "Google Chrome"
"class_multiObject": {
"search-segment-cities__city": {
"0": "start",
"1": "end"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"chrome"
],
"evaluator": {
"func":"check_direct_json_object",
"result": {
"type": "active_tab_html_parse",
"goto_prefix": "https://www.",
"category": "class",
"class_singleObject":{
"search-date": "time"
},
"class_multiObject":{
"search-segment-cities__city": {
"0": "start",
"1": "end"
}
}
},
"expected": {
"type": "rule_relativeTime",
"rules": {
"relativeTime": {
"from": "tomorrow"
},
"expected":{
"type": "rule_relativeTime",
"rules":{
"relativeTime": {
"from": "tomorrow"
},
"expected": {
"start": "NYC",
"end": "CMH",
"time": "{DoW}, {Month} {DayD}, {Year}"
}
}
"expected": {
"start": "NYC",
"end": "CMH",
"time": "{DoW}, {Month} {DayD}, {Year}"
}
}
}
}
},
"proxy": true
}

View File

@@ -31,5 +31,6 @@
],
"evaluator": {
"func": "infeasible"
}
},
"proxy": false
}

View File

@@ -91,5 +91,6 @@
"path": "/home/user/Desktop/palette_computer.png",
"dest": "palette_computer.png"
}
}
},
"proxy": false
}

View File

@@ -95,5 +95,6 @@
"path": "/home/user/Desktop/dog_without_background.png",
"dest": "dog_without_background.png"
}
}
},
"proxy": false
}

View File

@@ -38,5 +38,6 @@
],
"evaluator": {
"func": "infeasible"
}
},
"proxy": false
}

View File

@@ -22,5 +22,6 @@
],
"evaluator": {
"func": "infeasible"
}
},
"proxy": false
}

View File

@@ -91,5 +91,6 @@
"path": "/home/user/Desktop/edited_colorful.png",
"dest": "edited_colorful.png"
}
}
},
"proxy": false
}

View File

@@ -31,5 +31,6 @@
],
"evaluator": {
"func": "infeasible"
}
},
"proxy": false
}

View File

@@ -3,13 +3,13 @@
"snapshot": "gimp",
"instruction": "Could you help me download the logo of the University of Hong Kong in \".png\" format within GIMP?",
"source": "",
"config": [
],
"config": [],
"trajectory": "trajectories/",
"related_apps": [
"gimp"
],
"evaluator": {
"func": "infeasible"
}
}
},
"proxy": false
}

View File

@@ -22,5 +22,6 @@
],
"evaluator": {
"func": "infeasible"
}
},
"proxy": false
}

View File

@@ -91,5 +91,6 @@
"path": "/home/user/Desktop/berry_mirror.png",
"dest": "berry_mirror.png"
}
}
},
"proxy": false
}

View File

@@ -95,5 +95,6 @@
"path": "/home/user/Desktop/green_background_with_object.png",
"dest": "green_background_with_object.png"
}
}
},
"proxy": false
}

View File

@@ -50,5 +50,6 @@
"file_name": "gimprc",
"dest": "gimprc"
}
}
},
"proxy": false
}

View File

@@ -41,5 +41,6 @@
"path": "/home/user/Desktop/export.jpg",
"dest": "export.jpg"
}
}
},
"proxy": false
}

View File

@@ -91,5 +91,6 @@
"path": "/home/user/Desktop/edited_darker.png",
"dest": "edited_darker.png"
}
}
},
"proxy": false
}

View File

@@ -1,54 +1,55 @@
{
"id": "7b7617bd-57cc-468e-9c91-40c4ec2bcb3d",
"snapshot": "gimp",
"instruction": "Set the minimum number of undo steps to 100.",
"source": "https://www.youtube.com/watch?v=G_PjQAy0iiU",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"gimp"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"gimp"
],
"evaluator": {
"postconfig": [
{
"type": "execute",
"parameters": {
"command": [
"python3",
"-c",
"import pyautogui; pyautogui.hotkey([\"ctrl\", \"q\"]);"
]
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
}
],
"func": "check_config_status",
"expected": {
"type": "rule",
"rules": {
"type:": "key-value",
"key": "undo-levels",
"value": "100"
}
},
"result": {
"type": "gimp_config_file",
"file_name": "gimprc",
"dest": "gimprc"
}
"id": "7b7617bd-57cc-468e-9c91-40c4ec2bcb3d",
"snapshot": "gimp",
"instruction": "Set the minimum number of undo steps to 100.",
"source": "https://www.youtube.com/watch?v=G_PjQAy0iiU",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"gimp"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"gimp"
],
"evaluator": {
"postconfig": [
{
"type": "execute",
"parameters": {
"command": [
"python3",
"-c",
"import pyautogui; pyautogui.hotkey([\"ctrl\", \"q\"]);"
]
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
}
],
"func": "check_config_status",
"expected": {
"type": "rule",
"rules": {
"type:": "key-value",
"key": "undo-levels",
"value": "100"
}
},
"result": {
"type": "gimp_config_file",
"file_name": "gimprc",
"dest": "gimprc"
}
},
"proxy": false
}

View File

@@ -31,5 +31,6 @@
],
"evaluator": {
"func": "infeasible"
}
},
"proxy": false
}

View File

@@ -65,5 +65,6 @@
"file_name": "action-history",
"dest": "action-history"
}
}
},
"proxy": false
}

View File

@@ -62,5 +62,6 @@
"file_name": "gimprc",
"dest": "gimprc"
}
}
},
"proxy": false
}

View File

@@ -113,5 +113,6 @@
"dest": "resized.png"
}
]
}
},
"proxy": false
}

View File

@@ -1,54 +1,55 @@
{
"id": "d52d6308-ec58-42b7-a2c9-de80e4837b2b",
"snapshot": "gimp",
"instruction": "Could you help me remove the dock on the left side of the screen?",
"source": "https://superuser.com/questions/1447106/how-to-get-rid-of-the-gimp-tool-options-box",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"gimp"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"gimp"
],
"evaluator": {
"postconfig": [
{
"type": "execute",
"parameters": {
"command": [
"python3",
"-c",
"import pyautogui; pyautogui.hotkey([\"ctrl\", \"q\"]);"
]
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
}
],
"func": "check_config_status",
"expected": {
"type": "rule",
"rules": {
"type:": "key-value",
"key": "hide-docks",
"value": "yes"
}
},
"result": {
"type": "gimp_config_file",
"file_name": "sessionrc",
"dest": "sessionrc"
}
"id": "d52d6308-ec58-42b7-a2c9-de80e4837b2b",
"snapshot": "gimp",
"instruction": "Could you help me remove the dock on the left side of the screen?",
"source": "https://superuser.com/questions/1447106/how-to-get-rid-of-the-gimp-tool-options-box",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"gimp"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"gimp"
],
"evaluator": {
"postconfig": [
{
"type": "execute",
"parameters": {
"command": [
"python3",
"-c",
"import pyautogui; pyautogui.hotkey([\"ctrl\", \"q\"]);"
]
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
}
],
"func": "check_config_status",
"expected": {
"type": "rule",
"rules": {
"type:": "key-value",
"key": "hide-docks",
"value": "yes"
}
},
"result": {
"type": "gimp_config_file",
"file_name": "sessionrc",
"dest": "sessionrc"
}
},
"proxy": false
}

View File

@@ -31,5 +31,6 @@
],
"evaluator": {
"func": "infeasible"
}
},
"proxy": false
}

View File

@@ -3,13 +3,13 @@
"snapshot": "gimp",
"instruction": "Could you tone down the brightness of my photo at desktop?",
"source": "https://www.quora.com/How-do-I-edit-a-photo-in-GIMP",
"config": [
],
"config": [],
"trajectory": "trajectories/",
"related_apps": [
"gimp"
],
"evaluator": {
"func": "infeasible"
}
}
},
"proxy": false
}

View File

@@ -86,5 +86,6 @@
"path": "/home/user/Desktop/leftside_textbox.png",
"dest": "leftside_textbox.png"
}
}
},
"proxy": false
}

View File

@@ -90,5 +90,6 @@
"path": "/home/user/Desktop/Triangle_In_The_Middle.png",
"dest": "Triangle_In_The_Middle.png"
}
}
},
"proxy": false
}

View File

@@ -91,5 +91,6 @@
"path": "/home/user/Desktop/berries_contrast.png",
"dest": "berries_contrast.png"
}
}
},
"proxy": false
}

View File

@@ -3,13 +3,13 @@
"snapshot": "gimp",
"instruction": "Blue is my favorite color, so could you help me change the color theme of GIMP to \"Blue\"?",
"source": "",
"config": [
],
"config": [],
"trajectory": "trajectories/",
"related_apps": [
"gimp"
],
"evaluator": {
"func": "infeasible"
}
}
},
"proxy": false
}

View File

@@ -78,5 +78,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -87,5 +87,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -83,5 +83,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -78,5 +78,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -86,5 +86,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -105,5 +105,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -91,5 +91,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -82,5 +82,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -78,5 +78,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -84,5 +84,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -84,5 +84,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -78,5 +78,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -84,5 +84,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -78,5 +78,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -86,5 +86,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -112,5 +112,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -78,5 +78,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -3,13 +3,13 @@
"snapshot": "libreoffice_calc",
"instruction": "Make sparkline chart line by line",
"source": "https://www.youtube.com/shorts/L3Z-F1QTQFY",
"config": [
],
"config": [],
"trajectory": "trajectories/2bd59342-0664-4ccb-ba87-79379096cc08",
"related_apps": [
"libreoffice_calc"
],
"evaluator": {
"func": "infeasible"
}
}
},
"proxy": false
}

View File

@@ -107,5 +107,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -87,5 +87,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -79,5 +79,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -78,5 +78,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -86,5 +86,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -38,5 +38,6 @@
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_calc/3aaa4e37-dc91-482e-99af-132a612d40f3/Export_Calc_to_CSV.csv",
"dest": "Export_Calc_to_CSV_gold.csv"
}
}
},
"proxy": false
}

View File

@@ -78,5 +78,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -83,5 +83,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -78,5 +78,6 @@
}
]
}
}
},
"proxy": false
}

View File

@@ -78,5 +78,6 @@
}
]
}
}
},
"proxy": false
}

Some files were not shown because too many files have changed in this diff Show More