Merge branch 'main' of github.com:xlang-ai/OSWorld
This commit is contained in:
@@ -115,6 +115,11 @@ def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> f
|
|||||||
if match_type == "url":
|
if match_type == "url":
|
||||||
expected_urls = rule['urls']
|
expected_urls = rule['urls']
|
||||||
actual_urls = [tab['url'] for tab in open_tabs]
|
actual_urls = [tab['url'] for tab in open_tabs]
|
||||||
|
if not are_lists_equal(expected_urls, actual_urls, compare_urls):
|
||||||
|
logger.error("list not match")
|
||||||
|
logger.error(expected_urls)
|
||||||
|
logger.error(actual_urls)
|
||||||
|
return 0
|
||||||
return 1 if are_lists_equal(expected_urls, actual_urls, compare_urls) else 0
|
return 1 if are_lists_equal(expected_urls, actual_urls, compare_urls) else 0
|
||||||
else:
|
else:
|
||||||
logger.error(f"Unknown type: {match_type}")
|
logger.error(f"Unknown type: {match_type}")
|
||||||
@@ -343,7 +348,7 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
|
|||||||
return score / len(pred_files)
|
return score / len(pred_files)
|
||||||
|
|
||||||
|
|
||||||
def compare_htmls(html_path1: str, html_path2: str) -> float:
|
def compare_htmls(html_path1: str, html_path2: str, **options) -> float:
|
||||||
"""
|
"""
|
||||||
Compare two HTML files.
|
Compare two HTML files.
|
||||||
"""
|
"""
|
||||||
@@ -351,20 +356,33 @@ def compare_htmls(html_path1: str, html_path2: str) -> float:
|
|||||||
soup1 = BeautifulSoup(inf, 'lxml')
|
soup1 = BeautifulSoup(inf, 'lxml')
|
||||||
with open(html_path2, 'r', encoding='utf-8') as inf:
|
with open(html_path2, 'r', encoding='utf-8') as inf:
|
||||||
soup2 = BeautifulSoup(inf, 'lxml')
|
soup2 = BeautifulSoup(inf, 'lxml')
|
||||||
|
ignore_sdnum = options.get("ignore_sdnum", None)
|
||||||
|
|
||||||
def compare_elements(elem1, elem2):
|
def compare_elements(elem1, elem2):
|
||||||
if not (isinstance(elem1, Tag) and isinstance(elem2, Tag)):
|
if not (isinstance(elem1, Tag) and isinstance(elem2, Tag)):
|
||||||
|
if elem1 != elem2:
|
||||||
|
logger.info("not the same")
|
||||||
return elem1 == elem2
|
return elem1 == elem2
|
||||||
if elem1.name != elem2.name:
|
if elem1.name != elem2.name:
|
||||||
|
logger.info("html name not match")
|
||||||
return False
|
return False
|
||||||
if elem1.text.strip() != elem2.text.strip():
|
if elem1.text.strip() != elem2.text.strip():
|
||||||
|
logger.info("html text not match")
|
||||||
return False
|
return False
|
||||||
if elem1.attrs != elem2.attrs:
|
if elem1.attrs != elem2.attrs:
|
||||||
|
if ignore_sdnum:
|
||||||
|
attrs1 = {k: v for k, v in elem1.attrs.items() if k != 'sdnum'}
|
||||||
|
attrs2 = {k: v for k, v in elem2.attrs.items() if k != 'sdnum'}
|
||||||
|
return attrs1 == attrs2
|
||||||
|
logger.info("html attrs not match")
|
||||||
|
logger.info(f"{elem1.attrs}")
|
||||||
|
logger.info(f"{elem2.attrs}")
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
for elem1, elem2 in zip(soup1.recursiveChildGenerator(), soup2.recursiveChildGenerator()):
|
for elem1, elem2 in zip(soup1.recursiveChildGenerator(), soup2.recursiveChildGenerator()):
|
||||||
if not compare_elements(elem1, elem2):
|
if not compare_elements(elem1, elem2):
|
||||||
|
logger.info("html not match")
|
||||||
return .0
|
return .0
|
||||||
return 1.
|
return 1.
|
||||||
|
|
||||||
|
|||||||
@@ -213,7 +213,6 @@ _accessibility_ns_map = {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def check_accessibility_tree(result: str, rules: List[Dict[str, Any]], osname: str = "ubuntu") -> float:
|
def check_accessibility_tree(result: str, rules: List[Dict[str, Any]], osname: str = "ubuntu") -> float:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
|
|||||||
@@ -163,7 +163,8 @@
|
|||||||
"hua shan mountain.jpg"
|
"hua shan mountain.jpg"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"expect_in_result": true
|
"expect_in_result": true,
|
||||||
|
"result_not_list": true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"id": "da922383-bfa4-4cd3-bbad-6bebab3d7742",
|
"id": "da922383-bfa4-4cd3-bbad-6bebab3d7742",
|
||||||
"snapshot": "multiapps",
|
"snapshot": "multiapps",
|
||||||
"instruction": "I browsed a lot of interesting blog articles today. I hope to store these articles in my local designated folder just like zotero stores papers. Please download the blogs opening now in pdf format and save them in their tile to /home/user/Documents/Blog.",
|
"instruction": "I browsed a lot of interesting blog articles today. I hope to store these articles in my local designated folder just like zotero stores papers. Please download the blogs opening now in pdf format and save them in their title to /home/user/Documents/Blog.",
|
||||||
"source": "authors",
|
"source": "authors",
|
||||||
"config": [
|
"config": [
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -89,7 +89,14 @@
|
|||||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/e135df7c-7687-4ac0-a5f0-76b74438b53e/annual-enterprise-survey-2021-financial-year-provisional.html",
|
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/e135df7c-7687-4ac0-a5f0-76b74438b53e/annual-enterprise-survey-2021-financial-year-provisional.html",
|
||||||
"dest": "annual-enterprise-survey-2021-financial-year-provisional_gold.html"
|
"dest": "annual-enterprise-survey-2021-financial-year-provisional_gold.html"
|
||||||
}
|
}
|
||||||
|
],
|
||||||
|
"options": [
|
||||||
|
{},
|
||||||
|
{
|
||||||
|
"ignore_sdnum": true
|
||||||
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|
||||||
"proxy": true
|
"proxy": true
|
||||||
}
|
}
|
||||||
@@ -36,8 +36,8 @@
|
|||||||
],
|
],
|
||||||
"evaluator": {
|
"evaluator": {
|
||||||
"func": [
|
"func": [
|
||||||
"check_structure_sim",
|
"check_structure_sim_resized",
|
||||||
"check_structure_sim"
|
"check_structure_sim_resized"
|
||||||
],
|
],
|
||||||
"result": [
|
"result": [
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
"files": [
|
"files": [
|
||||||
{
|
{
|
||||||
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2/file.xls",
|
"url": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/f8cfa149-d1c1-4215-8dac-4a0932bad3c2/file.xls",
|
||||||
"path": "/home/user/cell_search.xlsx"
|
"path": "/home/user/cell_search.xls"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -47,7 +47,7 @@
|
|||||||
{
|
{
|
||||||
"type": "open",
|
"type": "open",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"path": "/home/user/cell_search.xlsx"
|
"path": "/home/user/cell_search.xls"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -65,7 +65,7 @@
|
|||||||
"type": "rule",
|
"type": "rule",
|
||||||
"rules": {
|
"rules": {
|
||||||
"expect": {
|
"expect": {
|
||||||
"pattern": "www\\.google\\.com.*?/search\\?q=Nereida&"
|
"pattern": "https?://(www\\.?)?google\\.com/search\\?q=nereida(&|$)"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user