diff --git a/.gitignore b/.gitignore
index 22367f9..df58997 100644
--- a/.gitignore
+++ b/.gitignore
@@ -173,6 +173,8 @@ branch_flag
branch-config
*.syncthing.*.tmp
cache
+version.folder
+at_processing
test.xlsx
test2.xlsx
diff --git a/README.md b/README.md
index 71b5302..8eb867f 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
-# DesktopEnv: An Environment towards Human-like Computer Task Mastery
+# OSWorld: Open-Ended Tasks in Real Computer Environments
+
+
+
SLOGAN
@@ -8,7 +11,7 @@
Paper
-
+![Overview]()
## Updates
- 2024-03-01: We released our [paper](), [environment code](), [dataset](), and [project page](). Check it out!
diff --git a/desktop_env/assets/icon.jpg b/desktop_env/assets/icon.jpg
new file mode 100644
index 0000000..1879c95
Binary files /dev/null and b/desktop_env/assets/icon.jpg differ
diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py
index 0304827..13a5385 100644
--- a/desktop_env/evaluators/getters/chrome.py
+++ b/desktop_env/evaluators/getters/chrome.py
@@ -82,34 +82,28 @@ def get_info_from_website(env, config: Dict[Any, Any]) -> Any:
page.wait_for_load_state('load')
action = info_dict.get('action', 'inner_text')
if action == "inner_text":
- ele = page.locator(info_dict['selector'])
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == "attribute":
- ele = page.locator(info_dict['selector'])
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.get_attribute(info_dict['attribute']))
elif action == 'click_and_inner_text':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
- link = page.locator(sel)
- expect(link).to_be_visible()
+ link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
- ele = page.locator(sel)
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(sel, state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == 'click_and_attribute':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
- link = page.locator(sel)
- expect(link).to_be_visible()
+ link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
- ele = page.locator(sel)
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(sel, state='attached')
infos.append(ele.get_attribute(info_dict['attribute']))
else:
raise NotImplementedError(f'The action {action} is not supported yet.')
diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py
index 39394d0..2cb81ab 100644
--- a/desktop_env/evaluators/getters/file.py
+++ b/desktop_env/evaluators/getters/file.py
@@ -86,7 +86,6 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
if not config.get("multi", False):
paths: List[str] = [config["path"]]
dests: List[str] = [config["dest"]]
- print(config)
if "time_suffix" in config.keys() and config["time_suffix"]:
if "time_format" in config.keys():
time_format = config["time_format"]
@@ -96,19 +95,14 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
else:
paths: List[str] = config["path"]
dests: List[str] = config["dest"]
- print(paths)
- print(dests)
+
cache_paths: List[str] = []
gives: Set[int] = set(config.get("gives", [0]))
for i, (p, d) in enumerate(zip(paths, dests)):
- print("env cache_dir: ")
- print(env.cache_dir)
_path = os.path.join(env.cache_dir, d)
- print("_path: ")
- print(_path)
file = env.controller.get_file(p)
if file is None:
#return None
@@ -121,9 +115,6 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
cache_paths.append(_path)
with open(_path, "wb") as f:
f.write(file)
- # debug
- print("cache_paths")
- print(cache_paths)
return cache_paths[0] if len(cache_paths)==1 else cache_paths
diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py
index 6fdcf8d..c3f7f85 100644
--- a/desktop_env/evaluators/metrics/__init__.py
+++ b/desktop_env/evaluators/metrics/__init__.py
@@ -68,7 +68,7 @@ from .general import (
compare_time_in_speedtest_results,
is_included_all_json_objects,
is_gold_text_included_in_pdf,
- check_csv_line_number,
+ check_line_number,
file_contains,
compare_terminal_and_txt,
fuzzy_place_math,
diff --git a/desktop_env/evaluators/metrics/calc.py b/desktop_env/evaluators/metrics/calc.py
index 701be27..0ff0744 100644
--- a/desktop_env/evaluators/metrics/calc.py
+++ b/desktop_env/evaluators/metrics/calc.py
@@ -1,8 +1,13 @@
+import logging
+from typing import List
+
import openpyxl
-def compare_conference_city_in_order( actual_city_list_path, expected_city):
+logger = logging.getLogger("desktopenv.metrics.calc")
+
+
+def compare_conference_city_in_order(actual_city_list_path, expected_city):
expected_city_list = expected_city["expected"]
- print(f"Reading csv file from {actual_city_list_path}")
wb = openpyxl.load_workbook(actual_city_list_path)
sheet = wb.active
actual_city_list = []
@@ -12,16 +17,25 @@ def compare_conference_city_in_order( actual_city_list_path, expected_city):
# expected_city is the city that we want to compare with the actual city list
# must in order index
# debug
- print("expected_city_list:")
- print(expected_city_list)
- print("actual_city_list_path:")
- print(actual_city_list)
- wrong_list = []
try:
for i in range(len(actual_city_list)):
- if expected_city_list[i] not in actual_city_list[i]:
- wrong_list.append(i)
- print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ if isinstance(expected_city_list[i], str):
+ if expected_city_list[i] not in actual_city_list[i]:
+ logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ return 0.
+
+
+ elif isinstance(expected_city_list[i], List):
+ if not any(possible_str in actual_city_list[i] for possible_str in expected_city_list[i]):
+ logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ return 0.
+
+ else:
+ raise TypeError("Expected city should be a string or a list of strings")
+
except:
- return False
- return True if len(wrong_list) == 0 else False
\ No newline at end of file
+ return 0.
+
+ return 1.
diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py
index 5dc2a48..3054bee 100644
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -65,7 +65,16 @@ def is_expected_installed_extensions(installed_extensions, expected) -> float:
print("installed_extensions: ")
print(installed_extensions)
expected_extensions = expected["expected"]
- return 1 if expected_extensions == installed_extensions else 0. # must equal, no additional extensions allowed
+
+ # whether the expected extensions are installed
+ set_expected_extensions = set(expected_extensions)
+ set_installed_extensions = set(installed_extensions)
+
+ if set_expected_extensions.issubset(set_installed_extensions):
+ return 1.
+ else:
+ return 0.
+
def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
"""
@@ -210,7 +219,10 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
"""
Compare two archives. Note that the files in the archives should be of the same type.
"""
- if not pred_path: return 0.
+ file_path = kwargs.pop('file_path', '')
+
+ if not pred_path:
+ return 0.
pred_folder = os.path.splitext(pred_path)[0] + '_pred'
gold_folder = os.path.splitext(gold_path)[0] + '_gold'
@@ -218,13 +230,16 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
shutil.rmtree(pred_folder, ignore_errors=True)
os.makedirs(pred_folder)
shutil.unpack_archive(pred_path, pred_folder)
+
if not os.path.exists(gold_folder): # use cache if exists
os.makedirs(gold_folder)
shutil.unpack_archive(gold_path, gold_folder)
- pred_files = sorted(os.listdir(pred_folder))
- gold_files = sorted(os.listdir(gold_folder))
- if pred_files != gold_files: return 0.
+ pred_files = sorted(os.listdir(os.path.join(pred_folder, file_path)))
+ gold_files = sorted(os.listdir(os.path.join(gold_folder, file_path)))
+
+ if pred_files != gold_files:
+ return 0.
def get_compare_function():
file_type = kwargs.pop('file_type', 'text')
@@ -260,8 +275,8 @@ def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
score = 0
compare_function = get_compare_function()
for f1, f2 in zip(pred_files, gold_files):
- fp1 = os.path.join(pred_folder, f1)
- fp2 = os.path.join(gold_folder, f2)
+ fp1 = os.path.join(pred_folder, file_path, f1)
+ fp2 = os.path.join(gold_folder, file_path, f2)
score += compare_function(fp1, fp2, **kwargs)
return score / len(pred_files)
@@ -381,3 +396,16 @@ def is_added_to_steam_cart(active_tab_info, rule):
return 0.
return 1.
+
+
+if __name__ == '__main__':
+ result = compare_archive(
+ r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\0e5303d4-8820-42f6-b18d-daf7e633de21\lecture_slides.zip",
+ r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\0e5303d4-8820-42f6-b18d-daf7e633de21\gold_lecture_slides.zip",
+
+ **{
+ "file_path": "lecture_slides",
+ "file_type": "pdf"
+ })
+
+ print(result)
diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py
index 7ff7bf2..a764810 100644
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -1,4 +1,5 @@
import csv
+import os
import datetime
import difflib
import functools
@@ -91,23 +92,24 @@ def fuzzy_match(result, rules) -> float:
def fuzzy_place_math(result_file_path, rules) -> float:
+ if result_file_path is None:
+ return 0.
expect = rules["expected"] # a list of possible answers
# read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line
doc = Document(result_file_path)
words_list = []
for para in doc.paragraphs:
words_list.extend(para.text.split())
- # Print out the list of extracted words
- print("Your Answers: ")
- print(words_list)
fuzzy_score_list = []
for word in words_list:
max_score = 0
for ans in expect:
- score = fuzz.ratio(word, ans)
+ score = fuzz.ratio(word, ans)/100
max_score = max(max_score, score)
fuzzy_score_list.append(max_score)
- return sum(fuzzy_score_list) / len(fuzzy_score_list)
+ if len(fuzzy_score_list) != 3:
+ return 0.
+ return sum(fuzzy_score_list) / 3
def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
"""
@@ -304,29 +306,40 @@ def check_direct_json_object(result, rules) -> float:
result = result.replace("'", '"')
# load json object
result = json.loads(result)
- print("result: ")
- print(result)
- print("expected: ")
- print(rules["expected"])
if result is None:
return 0.
+ try:
+ expect_in_result = rules.get("expect_in_result", False)
+ if not expect_in_result:
+ expected_json = rules["expected"]
+ for key in expected_json.keys():
+ expected_value = expected_json.get(key)
+ if expected_value != result.get(key):
+ return 0.
+ return 1.0
+ else:
+ expected_json = rules["expected"]
- expect_in_result = rules.get("expect_in_result", False)
- if not expect_in_result:
- expected_json = rules["expected"]
- for key in expected_json.keys():
- expected_value = expected_json.get(key)
- if expected_value != result.get(key):
- return 0.
- return 1.0
- else:
- expected_json = rules["expected"]
- for key in expected_json.keys():
- expected_value = expected_json.get(key)
- if expected_value not in result.get(key):
- return 0.
- return 1.0
-
+ for key in expected_json.keys():
+ if isinstance(expected_json.get(key), List):
+ flag = 0
+ expected_value_list = expected_json.get(key)
+ for each_expected_value in expected_value_list:
+ if each_expected_value in result.get(key):
+ flag = 1
+ break
+ if flag == 0:
+ return 0.
+ elif isinstance(expected_json.get(key), str):
+ if expected_json.get(key) not in result.get(key):
+ return 0.
+ else:
+ logger.debug("check_direct_json_object: expected value type not supported")
+ return 0.
+ return 1.0
+ except:
+ logger.debug("check_direct_json_object: result is not a valid json object")
+ return 0.
def compare_time_in_speedtest_results(speedtest_result_path, time_diff):
if not speedtest_result_path:
@@ -397,24 +410,37 @@ def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path):
def file_contains(file_path, config):
# file_path ends with .txt
if not file_path:
- return 1
- with open(file_path, 'r') as f:
- file_text = f.read()
- for text in config["expected"]:
- if text not in file_text:
- return 0
- return 1
+ return 0.
+ try:
+ with open(file_path, 'r') as f:
+ file_text = f.read()
+ for text in config["expected"]:
+ if text not in file_text:
+ logger.debug(f"file_contains: {text} not found in {file_path}")
+ return 0.
+ except:
+ logger.debug("file_contains: file not found or not readable")
+ return 0.
+ return 1.
-def check_csv_line_number(file_path, line_number):
- # check file_path suffix
- if not file_path.endswith('.csv'):
- return 0
- # check line number
- with open(file_path, 'r') as f:
- reader = csv.reader(f)
- line_count = sum(1 for row in reader)
- return 1 if line_count == int(line_number["expected"]) else 0
+def check_line_number(file_path, line_number):
+ # check if file_path exists
+ if file_path is None or not os.path.isfile(file_path):
+ return 0.
+ timeRegex = "([01]\d|2[0-3]):[0-5]\d:[0-5]\d"
+ # check if the string that matches the timeRegex in this txt file equals to line_number["expected"]
+ try:
+ with open(file_path, 'r') as f:
+ line_count = 0
+ for line in f:
+ if re.search(timeRegex, line):
+ line_count += 1
+ # if line_count equals to line_number["expected"], return 1, else return 0
+ return 1 if line_count == int(line_number["expected"]) else 0
+ except:
+ logger.debug("check_line_number: file not found or not readable")
+ return 0.
def compare_terminal_and_txt(txt_file_path, terminal_output):
diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py
index 4d9763b..f691724 100644
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -276,12 +276,12 @@ def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = N
# only text shown on current screen is available
# attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount)
text: str = text_obj.getText(0, text_obj.characterCount)
- if flag=="thunderbird":
- # appeard in thunderbird (uFFFC), "Object Replacement Character" in
- # Unicode, "used as placeholder in text for an otherwise
- # unspecified object; uFFFD is another "Replacement Character",
- # just in case
- text = text.replace("\ufffc", "").replace("\ufffd", "")
+ #if flag=="thunderbird":
+ # appeard in thunderbird (uFFFC) (not only in thunderbird), "Object
+ # Replacement Character" in Unicode, "used as placeholder in text for
+ # an otherwise unspecified object; uFFFD is another "Replacement
+ # Character", just in case
+ text = text.replace("\ufffc", "").replace("\ufffd", "")
# }}} Text #
# Selection {{{ #
diff --git a/evaluation_examples/examples/chrome/9656a811-9b5b-4ddf-99c7-5117bcef0626.json b/evaluation_examples/examples/chrome/9656a811-9b5b-4ddf-99c7-5117bcef0626.json
index a911041..70b5732 100644
--- a/evaluation_examples/examples/chrome/9656a811-9b5b-4ddf-99c7-5117bcef0626.json
+++ b/evaluation_examples/examples/chrome/9656a811-9b5b-4ddf-99c7-5117bcef0626.json
@@ -29,6 +29,15 @@
"chrome"
],
"evaluator": {
+ "postconfig": [
+ {
+ "type": "execute",
+ "parameters": {
+ "command": "pkill chrome",
+ "shell": "true"
+ }
+ }
+ ],
"func": "exact_match",
"result": {
"type": "enable_enhanced_safety_browsing"
diff --git a/evaluation_examples/examples/multi_apps/0e5303d4-8820-42f6-b18d-daf7e633de21.json b/evaluation_examples/examples/multi_apps/0e5303d4-8820-42f6-b18d-daf7e633de21.json
index abc1284..f8ecc50 100644
--- a/evaluation_examples/examples/multi_apps/0e5303d4-8820-42f6-b18d-daf7e633de21.json
+++ b/evaluation_examples/examples/multi_apps/0e5303d4-8820-42f6-b18d-daf7e633de21.json
@@ -90,6 +90,10 @@
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1Ej2iHG8p-QJe7FZQpPIIS82BHOlFAUQM&export=download&authuser=0&confirm=t",
"dest": "gold_lecture_slides.zip"
+ },
+ "options": {
+ "file_path": "lecture_slides",
+ "file_type": "pdf"
}
}
}
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/2373b66a-092d-44cb-bfd7-82e86e7a3b4d.json b/evaluation_examples/examples/multi_apps/2373b66a-092d-44cb-bfd7-82e86e7a3b4d.json
index 8cf2e38..4e76c1d 100644
--- a/evaluation_examples/examples/multi_apps/2373b66a-092d-44cb-bfd7-82e86e7a3b4d.json
+++ b/evaluation_examples/examples/multi_apps/2373b66a-092d-44cb-bfd7-82e86e7a3b4d.json
@@ -1,7 +1,7 @@
{
"id": "2373b66a-092d-44cb-bfd7-82e86e7a3b4d",
"snapshot": "multiapps",
- "instruction": "I want to understand the resource usage of my Ubuntu system under normal workloads. Please use the `sar` command in the `sysstat` toolkit to monitor the system CPU usage, evaluate the status once every second for 30 seconds, output the results to \"System_Resources_Report.txt\" under Desktop, and convert the txt to csv file with the same name.",
+ "instruction": "I want to understand the resource usage of my Ubuntu system under normal workloads. Please use the `sar` command in the `sysstat` toolkit to monitor system activity, evaluate the status once every second for 30 seconds, output the results to \"System_Resources_Report.txt\" under Desktop.",
"source": "author",
"config": [
{
@@ -17,7 +17,7 @@
"os", "calc"
],
"evaluator": {
- "func": ["file_contains", "check_csv_line_number"],
+ "func": ["file_contains", "check_line_number"],
"result":
[
{
@@ -27,8 +27,8 @@
},
{
"type": "vm_file",
- "path": "/home/user/Desktop/System_Resources_Report.csv",
- "dest": "System_Resources_Report.csv"
+ "path": "/home/user/Desktop/System_Resources_Report.txt",
+ "dest": "System_Resources_Report.txt"
}
],
"expected":
@@ -36,13 +36,13 @@
{
"type": "rule",
"rules" :{
- "expected": ["tps", "rtps", "wtps", "dtps", "bread/s", "bwrtn/s", "bdscd/s", "Average", "Linux"]
+ "expected": ["CPU", "%user","%nice","%system", "%iowait", "%steal", "%idle"]
}
},
{
"type": "rule",
"rules": {
- "expected": "33"
+ "expected": "31"
}
}]
}
diff --git a/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json
index be80232..34d0353 100644
--- a/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json
+++ b/evaluation_examples/examples/multi_apps/415ef462-bed3-493a-ac36-ca8c6d23bf1b.json
@@ -57,7 +57,7 @@
{
"type": "launch",
"parameters": {
- "command": ["nautilus"]
+ "command": ["nautilus", "/home/user/Documents/Finance"]
}
}
],
diff --git a/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json b/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
index b778489..2cc3cdb 100644
--- a/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
+++ b/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
@@ -25,6 +25,12 @@
"path": "/home/user/Desktop/Robotic_Workshop_Infographics.pptx"
}
},
+ {
+ "type": "sleep",
+ "parameters": {
+ "seconds": 3
+ }
+ },
{
"type": "launch",
"parameters": {
diff --git a/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
index 9d754dd..eb77ab5 100644
--- a/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
+++ b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
@@ -23,15 +23,6 @@
]
}
},
- {
- "type": "chrome_open_tabs",
- "parameters": {
- "urls_to_open": [
- "https://arxiv.org/abs/2005.14165",
- "https://wallhaven.cc/"
- ]
- }
- },
{
"type": "download",
"parameters": {
@@ -105,7 +96,7 @@
"147",
"372",
"Deep learning",
- "https://creativecoding.soe.ucsc.edu/courses/cs523/slides/week3/DeepLearning_LeCun.pdf"
+ "https://hal.science/hal-04206682/document"
]
},
"options": {
diff --git a/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json b/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
index 4792da5..414aab4 100644
--- a/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
+++ b/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
@@ -51,7 +51,7 @@
"command": [
"python",
"-c",
- "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+ "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5);"
]
}
}
@@ -60,7 +60,7 @@
"expected": {
"type": "rule",
"rules":{
- "expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing","Montreal","San Diego","Lille","Montreal","San Juan","New York","Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm","Montréal","New Orleans","Long Beach","Vancouver"]
+ "expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing",["Montreal", "Montréal"],"San Diego","Lille",["Montreal", "Montréal"],"San Juan",["New York", "New York City", "NYC"],"Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm",["Montreal", "Montréal"],"New Orleans","Long Beach","Vancouver"]
}
},
"result": {
diff --git a/evaluation_examples/examples/multi_apps/7ff48d5b-2df2-49da-b500-a5150ffc7f18.json b/evaluation_examples/examples/multi_apps/7ff48d5b-2df2-49da-b500-a5150ffc7f18.json
index 07d4c0d..4ce5836 100644
--- a/evaluation_examples/examples/multi_apps/7ff48d5b-2df2-49da-b500-a5150ffc7f18.json
+++ b/evaluation_examples/examples/multi_apps/7ff48d5b-2df2-49da-b500-a5150ffc7f18.json
@@ -33,6 +33,24 @@
"chrome", "os", "writer"
],
"evaluator": {
+ "postconfig":[
+ {
+ "type": "activate_window",
+ "parameters": {
+ "window_name": "AllLocations.docx - LibreOffice Writer"
+ }
+ },
+ {
+ "type": "execute",
+ "parameters": {
+ "command": [
+ "python",
+ "-c",
+ "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+ ]
+ }
+ }
+ ],
"func": "fuzzy_place_math",
"result": {
"type": "vm_file",
diff --git a/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json b/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
index 762fcc3..e564020 100644
--- a/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
+++ b/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
@@ -1,7 +1,7 @@
{
"id": "81c425f5-78f3-4771-afd6-3d2973825947",
"snapshot": "libreoffice_calc",
- "instruction": "Can you assist me in transferring the data from LibreOffice Calc in this file to a LibreOffice Writer table while preserving the original format as in calc file? Save the document as \"price.docx\" on the desktop.",
+ "instruction": "Can you assist me in transferring the data from LibreOffice Calc in the current sheet to a LibreOffice Writer table while preserving the original format as in calc file? Save the document as \"price.docx\" on the desktop.",
"source": "authors",
"config": [
{
diff --git a/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json b/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json
index 2d9f175..f1cb660 100644
--- a/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json
+++ b/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json
@@ -56,7 +56,7 @@
"expected": {
"type": "rule",
"rules":{
- "expected": ["Zoom Chrome Extension", "Speechify Text to Speech Voice Reader", "React Developer Tools", "Momentum", "Google Translate", "Web Store", "Chromium PDF Viewer", "Google Hangouts"]
+ "expected": ["Zoom Chrome Extension", "Speechify Text to Speech Voice Reader", "React Developer Tools", "Momentum", "Google Translate"]
}
},
"result": {
diff --git a/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json b/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json
index 58c7ca7..89f5a21 100644
--- a/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json
+++ b/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json
@@ -30,7 +30,7 @@
],
"trajectory": "trajectories/",
"related_apps": [
- "os", "vlc"
+ "os", "vlc", "ubuntu_media_player"
],
"evaluator": {
"postconfig":[
diff --git a/evaluation_examples/examples/multi_apps/ce2b64a2-ddc1-4f91-8c7d-a88be7121aac.json b/evaluation_examples/examples/multi_apps/ce2b64a2-ddc1-4f91-8c7d-a88be7121aac.json
index 9628868..95a068e 100644
--- a/evaluation_examples/examples/multi_apps/ce2b64a2-ddc1-4f91-8c7d-a88be7121aac.json
+++ b/evaluation_examples/examples/multi_apps/ce2b64a2-ddc1-4f91-8c7d-a88be7121aac.json
@@ -20,7 +20,7 @@
"parameters": {
"files": [
{
- "url": "https://drive.usercontent.google.com/download?id=1iquZNI4ktx2915srQ1MMlhcwCuXnmq5T&export=download&authuser=0&confirm=t&uuid=e0bfd86f-a0f1-4a9e-8344-0f645eb379dd&at=APZUnTWQnCd7bNtf8ZN4KfkQUaJ9:1709972645590",
+ "url": "https://drive.usercontent.google.com/download?id=1XLBhnopqQrTw3bxRWOyfTCslfawFyOaZ&export=download&authuser=0&confirm=t&uuid=f41a09c1-8319-4a50-b8c3-4fc58ba51c00&at=APZUnTV3uiv7wKiSvoCekJG_aVo3:1710051227101",
"path": "/home/user/Pictures/picture2.jpg"
}
@@ -68,9 +68,9 @@
"type": "rule",
"rules":{
"expected":{
- "ec076282f61ba74642e94b5a6a1250c6988204d59d9b02936606b6b8ef1e4433": "Kilimanjaro",
- "999957c8de835bfa420d198270e7a6b079ee20ff53a3f214491e8134768a7c0b": "Himalayas",
- "79f45d40d8413d4e81f1b9734ea39e58622cafd79e12bab32959643fc245147c": "Hua"
+ "ec076282f61ba74642e94b5a6a1250c6988204d59d9b02936606b6b8ef1e4433": ["Kilimanjaro"],
+ "6ed4239ecc2be3ec15ad65a78c5c823b9004d640b8cc83a6a7af5930f354de91": ["Himalayas", "Everest", "Sagarmatha"],
+ "79f45d40d8413d4e81f1b9734ea39e58622cafd79e12bab32959643fc245147c": ["Hua", "hua"]
},
"expect_in_result": true
}
diff --git a/evaluation_examples/examples/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742.json b/evaluation_examples/examples/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742.json
index c751d7b..2ec27c1 100644
--- a/evaluation_examples/examples/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742.json
+++ b/evaluation_examples/examples/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742.json
@@ -1,7 +1,7 @@
{
"id": "da922383-bfa4-4cd3-bbad-6bebab3d7742",
"snapshot": "multiapps",
- "instruction": "I browsed a lot of interesting blog articles today. I hope to store these articles in my local designated folder just like zotero stores papers. Please download the blogs you are opening now in pdf format to /home/user/Documents/Blogs.",
+ "instruction": "I browsed a lot of interesting blog articles today. I hope to store these articles in my local designated folder just like zotero stores papers. Please download the blogs opening now in pdf format and save them in their tile to /home/user/Documents/Blogs.",
"source": "authors",
"config": [
{
diff --git a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
index 7ab439f..7c45ec3 100644
--- a/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
+++ b/mm_agents/accessibility_tree_wrap/heuristic_retrieve.py
@@ -25,35 +25,42 @@ def find_leaf_nodes(xlm_file_str):
return leaf_nodes
-def filter_nodes(nodes):
+def filter_nodes(nodes, platform="ubuntu"):
filtered_nodes = []
for node in nodes:
- if not node.get('{uri:deskat:state.at-spi.gnome.org}visible', None) == 'true':
- # Not visible
- continue
- # Check if the node is a 'panel'
- if node.tag == 'panel':
- # Check if the 'panel' represents an interactive element
- # or if it has certain attributes that are of interest.
- # Add your conditions here...
- if node.get('{uri:deskat:state.at-spi.gnome.org}focusable', 'false') == 'true':
- filtered_nodes.append(node)
- elif node.tag == 'text':
- continue
- elif node.get("name") == "" and node.text is None:
- continue
- else:
- coords = tuple(
- map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}screencoord').strip('()').split(', ')))
- if coords[0] < 0 or coords[1] < 0:
- continue
- size = tuple(
- map(int, node.attrib.get('{uri:deskat:component.at-spi.gnome.org}size').strip('()').split(', ')))
- if size[0] <= 0 or size[1] <= 0:
- continue
- # Node is not a 'panel', add to the list.
- filtered_nodes.append(node)
+ if node.tag.startswith("document")\
+ or node.tag.endswith("item")\
+ or node.tag.endswith("button")\
+ or node.tag.endswith("heading")\
+ or node.tag.endswith("label")\
+ or node.tag.endswith("bar")\
+ or node.tag.endswith("searchbox")\
+ or node.tag.endswith("textbox")\
+ or node.tag.endswith("link")\
+ or node.tag.endswith("tabelement")\
+ or node.tag.endswith("textfield")\
+ or node.tag.endswith("textarea")\
+ or node.tag in [ "alert", "canvas", "check-box"
+ , "combo-box", "entry", "icon"
+ , "image", "paragraph"
+ , "section", "slider", "static"
+ , "table-cell", "terminal", "text"
+ , "netuiribbontab", "start", "trayclockwclass"
+ , "traydummysearchcontrol", "uiimage", "uiproperty"
+ ]:
+ if ( platform=="ubuntu"\
+ and node.get("{{{:}}}showing".format("uri:deskat:state.at-spi.gnome.org"), "false")=="true"\
+ and node.get("{{{:}}}visible".format("uri:deskat:state.at-spi.gnome.org"), "false")=="true"\
+ or platform=="windows"\
+ and node.get("{{{:}}}visible".format("uri:deskat:state.at-spi.gnome.org"), "false")=="true"\
+ )\
+ and node.get("{{{:}}}enabled".format("uri:deskat:state.at-spi.gnome.org"), "false")=="true"\
+ and (node.get("name", "") != "" or node.text is not None and len(node.text)>0):
+ coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format("uri:deskat:component.at-spi.gnome.org")))
+ sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format("uri:deskat:component.at-spi.gnome.org")))
+ if coordinates[0]>0 and coordinates[1]>0 and sizes[0]>0 and sizes[1]>0:
+ filtered_nodes.append(node)
return filtered_nodes
@@ -134,12 +141,14 @@ def print_nodes_with_indent(nodes, indent=0):
if __name__ == '__main__':
- with open('chrome_desktop_example_1.xml', 'r', encoding='utf-8') as f:
- xml_file_str = f.read()
+ import json
+ with open('2.json', 'r', encoding='utf-8') as f:
+ xml_file_str = json.load(f)["AT"]
filtered_nodes = filter_nodes(find_leaf_nodes(xml_file_str))
print(len(filtered_nodes))
- masks = draw_bounding_boxes(filtered_nodes, 'screenshot.png',
- 'chrome_desktop_example_1_tagged_remove.png', )
+ masks = draw_bounding_boxes( filtered_nodes, '2.png'
+ , '2.a.png'
+ )
# print(masks)
print(len(masks))