diff --git a/README.md b/README.md
index 71b5302..8eb867f 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
-# DesktopEnv: An Environment towards Human-like Computer Task Mastery
+# OSWorld: Open-Ended Tasks in Real Computer Environments
+
+
+
SLOGAN
@@ -8,7 +11,7 @@
Paper
-
+![Overview]()
## Updates
- 2024-03-01: We released our [paper](), [environment code](), [dataset](), and [project page](). Check it out!
diff --git a/desktop_env/assets/icon.jpg b/desktop_env/assets/icon.jpg
new file mode 100644
index 0000000..1879c95
Binary files /dev/null and b/desktop_env/assets/icon.jpg differ
diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py
index 0304827..13a5385 100644
--- a/desktop_env/evaluators/getters/chrome.py
+++ b/desktop_env/evaluators/getters/chrome.py
@@ -82,34 +82,28 @@ def get_info_from_website(env, config: Dict[Any, Any]) -> Any:
page.wait_for_load_state('load')
action = info_dict.get('action', 'inner_text')
if action == "inner_text":
- ele = page.locator(info_dict['selector'])
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == "attribute":
- ele = page.locator(info_dict['selector'])
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.get_attribute(info_dict['attribute']))
elif action == 'click_and_inner_text':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
- link = page.locator(sel)
- expect(link).to_be_visible()
+ link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
- ele = page.locator(sel)
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(sel, state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == 'click_and_attribute':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
- link = page.locator(sel)
- expect(link).to_be_visible()
+ link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
- ele = page.locator(sel)
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(sel, state='attached')
infos.append(ele.get_attribute(info_dict['attribute']))
else:
raise NotImplementedError(f'The action {action} is not supported yet.')
diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py
index 6b283a9..2cb81ab 100644
--- a/desktop_env/evaluators/getters/file.py
+++ b/desktop_env/evaluators/getters/file.py
@@ -96,16 +96,13 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
paths: List[str] = config["path"]
dests: List[str] = config["dest"]
+
cache_paths: List[str] = []
gives: Set[int] = set(config.get("gives", [0]))
for i, (p, d) in enumerate(zip(paths, dests)):
- print("env cache_dir: ")
- print(env.cache_dir)
_path = os.path.join(env.cache_dir, d)
- print("_path: ")
- print(_path)
file = env.controller.get_file(p)
if file is None:
#return None
diff --git a/desktop_env/evaluators/metrics/calc.py b/desktop_env/evaluators/metrics/calc.py
index 701be27..0ff0744 100644
--- a/desktop_env/evaluators/metrics/calc.py
+++ b/desktop_env/evaluators/metrics/calc.py
@@ -1,8 +1,13 @@
+import logging
+from typing import List
+
import openpyxl
-def compare_conference_city_in_order( actual_city_list_path, expected_city):
+logger = logging.getLogger("desktopenv.metrics.calc")
+
+
+def compare_conference_city_in_order(actual_city_list_path, expected_city):
expected_city_list = expected_city["expected"]
- print(f"Reading csv file from {actual_city_list_path}")
wb = openpyxl.load_workbook(actual_city_list_path)
sheet = wb.active
actual_city_list = []
@@ -12,16 +17,25 @@ def compare_conference_city_in_order( actual_city_list_path, expected_city):
# expected_city is the city that we want to compare with the actual city list
# must in order index
# debug
- print("expected_city_list:")
- print(expected_city_list)
- print("actual_city_list_path:")
- print(actual_city_list)
- wrong_list = []
try:
for i in range(len(actual_city_list)):
- if expected_city_list[i] not in actual_city_list[i]:
- wrong_list.append(i)
- print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ if isinstance(expected_city_list[i], str):
+ if expected_city_list[i] not in actual_city_list[i]:
+ logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ return 0.
+
+
+ elif isinstance(expected_city_list[i], List):
+ if not any(possible_str in actual_city_list[i] for possible_str in expected_city_list[i]):
+ logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ return 0.
+
+ else:
+ raise TypeError("Expected city should be a string or a list of strings")
+
except:
- return False
- return True if len(wrong_list) == 0 else False
\ No newline at end of file
+ return 0.
+
+ return 1.
diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py
index 5dc2a48..c67ec09 100644
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -65,7 +65,16 @@ def is_expected_installed_extensions(installed_extensions, expected) -> float:
print("installed_extensions: ")
print(installed_extensions)
expected_extensions = expected["expected"]
- return 1 if expected_extensions == installed_extensions else 0. # must equal, no additional extensions allowed
+
+ # whether the expected extensions are installed
+ set_expected_extensions = set(expected_extensions)
+ set_installed_extensions = set(installed_extensions)
+
+ if set_expected_extensions.issubset(set_installed_extensions):
+ return 1.
+ else:
+ return 0.
+
def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
"""
diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py
index 706f3ba..9d2c878 100644
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -223,17 +223,13 @@ def compare_docx_images(docx_file1, docx_file2):
return 1
-import pytesseract
-
-
+import easyocr
def compare_image_text(image_path, rule):
- if not image_path:
- return 0
-
- img = Image.open(image_path)
- img_text = pytesseract.image_to_string(img)
+ reader = easyocr.Reader(['en'])
+ result = reader.readtext(image_path)
+ extracted_text = ' '.join([entry[1] for entry in result])
if rule['type'] == 'text':
- return 1 if rule['text'] in img_text else 0
+ return 1 if rule['text'] in extracted_text else 0
else:
raise ValueError("Unsupported rule type")
diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py
index 9d3ce5f..f06623a 100644
--- a/desktop_env/evaluators/metrics/vscode.py
+++ b/desktop_env/evaluators/metrics/vscode.py
@@ -104,6 +104,27 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
return 0.0
import zipfile
+from difflib import SequenceMatcher
+import PyPDF2
+
+def compare_pdf_content(content1, content2, text_similarity_threshold):
+ def extract_text_from_pdf(content):
+ with open("temp.pdf", "wb") as temp_pdf:
+ temp_pdf.write(content)
+ with open("temp.pdf", "rb") as temp_pdf:
+ pdf_reader = PyPDF2.PdfReader(temp_pdf)
+ text = ''
+ for page_num in range(len(pdf_reader.pages)):
+ page = pdf_reader.pages[page_num]
+ text += page.extract_text()
+ return text
+
+ text1 = extract_text_from_pdf(content1)
+ text2 = extract_text_from_pdf(content2)
+
+ similarity_ratio = SequenceMatcher(None, text1, text2).ratio()
+
+ return similarity_ratio >= text_similarity_threshold
def compare_zip_files(actual: str, expected: str, **options) -> float:
"""
@@ -128,7 +149,12 @@ def compare_zip_files(actual: str, expected: str, **options) -> float:
content1 = zip_file1.read(file_name)
content2 = zip_file2.read(file_name)
- if content1 != content2:
+ if file_name.lower().endswith('.pdf'):
+ if compare_pdf_content(content1, content2, 0.95):
+ continue
+ else:
+ return 0.0
+ elif content1 != content2:
return 0.0
return 1.0
diff --git a/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json b/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json
index 7dfc5f0..504a5d8 100644
--- a/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json
+++ b/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json
@@ -54,7 +54,7 @@
"type": "rule",
"rules": {
"type": "text",
- "text": "$ ls\n"
+ "text": " Ls"
}
}
}
diff --git a/evaluation_examples/examples/multi_apps/09a37c51-e625-49f4-a514-20a773797a8a.json b/evaluation_examples/examples/multi_apps/09a37c51-e625-49f4-a514-20a773797a8a.json
index 1b22b30..c2e5084 100644
--- a/evaluation_examples/examples/multi_apps/09a37c51-e625-49f4-a514-20a773797a8a.json
+++ b/evaluation_examples/examples/multi_apps/09a37c51-e625-49f4-a514-20a773797a8a.json
@@ -37,7 +37,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1Ee1vNyG7gGpLKK2VlLfj6PxcmdkMdvqK&export=download&authuser=0&confirm=t&uuid=1f441c5d-b62d-4850-870f-8e8f113a4091&at=APZUnTWEvKSSkuGBWzen0S9L7aHP:1709727474803",
- "dest": "pic.jpg"
+ "dest": "pic_Gold.jpg"
},
"result": {
"type": "vm_file",
diff --git a/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json b/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json
index 3c98c88..bd7c785 100644
--- a/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json
+++ b/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json
@@ -47,7 +47,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1g2Trt9oxQyW_sx8aIztFA0zNsE4yNw2x&export=download&authuser=0&confirm=t&uuid=342751c4-54f1-4760-9326-e7388845ded0&at=APZUnTV5BcbaxIZrDglWbs84Oxln:1709623697315",
- "dest": "res.txt"
+ "dest": "res_Gold.txt"
},
"result": {
"type": "vm_file",
diff --git a/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json b/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json
index 3b09d5d..be93d29 100644
--- a/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json
+++ b/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json
@@ -63,7 +63,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=11kWQc1XFEqcIMuW0-NnZRSdv1199OmVI&export=download&authuser=0&confirm=t&uuid=694676fd-1ac9-4501-8acf-f48018494c7f&at=APZUnTV-koL51ka5dHum_HpGywv_:1709618406292",
- "dest": "image.docx"
+ "dest": "image_Gold.docx"
}
}
}
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json b/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
index b778489..2cc3cdb 100644
--- a/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
+++ b/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
@@ -25,6 +25,12 @@
"path": "/home/user/Desktop/Robotic_Workshop_Infographics.pptx"
}
},
+ {
+ "type": "sleep",
+ "parameters": {
+ "seconds": 3
+ }
+ },
{
"type": "launch",
"parameters": {
diff --git a/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json b/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
index ced78c2..a6396d7 100644
--- a/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
+++ b/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
@@ -1,7 +1,7 @@
{
"id": "4c26e3f3-3a14-4d86-b44a-d3cedebbb487",
"snapshot": "libreoffice_impress",
- "instruction": "I've noticed that the image on the second slide is too dim. Can you please enhance its brightness for me? Save the adjusted image on the Desktop and name it \"background\". Thank you!",
+ "instruction": "I've noticed that the image on the second slide is too dim. Can you please enhance its brightness for me? Save the adjusted image on the Desktop and name it \"background.png\". Thank you!",
"source": "https://www.quora.com/How-do-I-edit-a-photo-in-GIMP",
"config": [
{
diff --git a/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
index 9d754dd..eb77ab5 100644
--- a/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
+++ b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
@@ -23,15 +23,6 @@
]
}
},
- {
- "type": "chrome_open_tabs",
- "parameters": {
- "urls_to_open": [
- "https://arxiv.org/abs/2005.14165",
- "https://wallhaven.cc/"
- ]
- }
- },
{
"type": "download",
"parameters": {
@@ -105,7 +96,7 @@
"147",
"372",
"Deep learning",
- "https://creativecoding.soe.ucsc.edu/courses/cs523/slides/week3/DeepLearning_LeCun.pdf"
+ "https://hal.science/hal-04206682/document"
]
},
"options": {
diff --git a/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json b/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json
index b115c1f..7f30c25 100644
--- a/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json
+++ b/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json
@@ -61,12 +61,12 @@
{
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1kAp7ulAR_h4snny212yg1xyR1cMy3H2Q&export=download&authuser=0&confirm=t&uuid=3f6cb74c-63cc-4653-9083-00626ef2fc11&at=APZUnTWuXvVM2w1Q9h0hOsuX6thn:1709789680904",
- "dest": "paper01.pdf"
+ "dest": "paper01_Gold.pdf"
},
{
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1UMimItX51tzNXkIMGPpHOdPNF5Dx0Tpy&export=download&authuser=0&confirm=t&uuid=97b668a6-2d0d-4389-ac5e-234e931b4328&at=APZUnTVvuvbAE8r7jpK8AkzGUzyw:1709790384938",
- "dest": "ans.docx"
+ "dest": "ans_Gold.docx"
}
],
"result": [
diff --git a/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json b/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
index 4792da5..414aab4 100644
--- a/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
+++ b/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
@@ -51,7 +51,7 @@
"command": [
"python",
"-c",
- "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+ "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5);"
]
}
}
@@ -60,7 +60,7 @@
"expected": {
"type": "rule",
"rules":{
- "expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing","Montreal","San Diego","Lille","Montreal","San Juan","New York","Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm","Montréal","New Orleans","Long Beach","Vancouver"]
+ "expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing",["Montreal", "Montréal"],"San Diego","Lille",["Montreal", "Montréal"],"San Juan",["New York", "New York City", "NYC"],"Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm",["Montreal", "Montréal"],"New Orleans","Long Beach","Vancouver"]
}
},
"result": {
diff --git a/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json b/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
index 7070663..e564020 100644
--- a/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
+++ b/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
@@ -1,7 +1,7 @@
{
"id": "81c425f5-78f3-4771-afd6-3d2973825947",
"snapshot": "libreoffice_calc",
- "instruction": "Can you assist me in transferring the data from LibreOffice Calc in this file to a LibreOffice Writer table while preserving the original format as in calc file? Save the document as \"price.docx\" on the desktop.",
+ "instruction": "Can you assist me in transferring the data from LibreOffice Calc in the current sheet to a LibreOffice Writer table while preserving the original format as in calc file? Save the document as \"price.docx\" on the desktop.",
"source": "authors",
"config": [
{
@@ -63,7 +63,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1KbdlQC0qSAYewG8QnZgParnSwv3s3dub&export=download&authuser=0&confirm=t&uuid=15dcc25c-8168-425e-96e1-fd27e0d6904b&at=APZUnTVho4ZrREHf9DC4rKwdIi3R:1709557117932",
- "dest": "price.docx"
+ "dest": "price_Gold.docx"
}
}
}
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json b/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json
index 2d9f175..f1cb660 100644
--- a/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json
+++ b/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json
@@ -56,7 +56,7 @@
"expected": {
"type": "rule",
"rules":{
- "expected": ["Zoom Chrome Extension", "Speechify Text to Speech Voice Reader", "React Developer Tools", "Momentum", "Google Translate", "Web Store", "Chromium PDF Viewer", "Google Hangouts"]
+ "expected": ["Zoom Chrome Extension", "Speechify Text to Speech Voice Reader", "React Developer Tools", "Momentum", "Google Translate"]
}
},
"result": {
diff --git a/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json
index 3f33693..18452c6 100644
--- a/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json
+++ b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json
@@ -36,7 +36,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1IKRu-dMFP4Aqzq5-4TOmOWVci0qvC27K&export=download&authuser=0&confirm=t&uuid=e2dabad2-5648-4bc3-a40f-f008089cd613&at=APZUnTVh5JD5nT3EvutwHIaSnJAT:1709633945616",
- "dest": "Recruitment_and_retention_of_health_professionals_across_Europe.zip"
+ "dest": "Recruitment_and_retention_of_health_professionals_across_Europe_Gold.zip"
},
"result": {
"type": "vm_file",
diff --git a/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json b/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json
index 58c7ca7..89f5a21 100644
--- a/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json
+++ b/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json
@@ -30,7 +30,7 @@
],
"trajectory": "trajectories/",
"related_apps": [
- "os", "vlc"
+ "os", "vlc", "ubuntu_media_player"
],
"evaluator": {
"postconfig":[
diff --git a/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json b/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json
index 2e80af8..35d70cf 100644
--- a/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json
+++ b/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json
@@ -59,7 +59,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1r2KJv0H3foo1WlWnArxdXnaew-yixNqL&export=download&authuser=0&confirm=t&uuid=633cc27c-d38b-4c45-907d-025341b4af1c&at=APZUnTV8AW5F_aLVooprdfgt-Q-Z:1709547335200",
- "dest": "notes.docx"
+ "dest": "notes_Gold.docx"
},
"result": {
"type": "vm_file",
diff --git a/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json b/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json
index 11b5b9a..cba0a02 100644
--- a/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json
+++ b/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json
@@ -1,7 +1,7 @@
{
"id": "b337d106-053f-4d37-8da0-7f9c4043a66b",
"snapshot": "os",
- "instruction": "Recently, I've been exploring the use of the Vim editor for code editing. However, the default settings don't display relative line numbers. Please search the internet for a tutorial on enabling relative line numbers and setting it as the default for my local Vim.",
+ "instruction": "Recently, I've been exploring the use of the Vim editor for code editing. However, the default settings don't display line numbers in Vim editor. Please search the internet for a tutorial on adding line numbers in Vim and setting it as default for my local Vim.",
"source": "authors",
"config": [
{
diff --git a/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json b/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json
index cad0642..dcc4baf 100644
--- a/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json
+++ b/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json
@@ -54,7 +54,7 @@
}
}
],
- "func": "compare_docx_tables",
+ "func": "compare_docx_files",
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/script.docx",
@@ -63,7 +63,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1-Sol1W8S7Ybj-3KBJJarbcYUqS5wAQ1C&export=download&authuser=0&confirm=t&uuid=d967f546-b8f7-4ac2-b8fc-b1635f1cfbc4&at=APZUnTUazmbS2X3BSXDEQtJgobgf:1709559012053",
- "dest": "script.docx"
+ "dest": "script_Gold.docx"
}
}
}
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json b/evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json
index 582a9a6..70e8be3 100644
--- a/evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json
+++ b/evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json
@@ -7,7 +7,11 @@
{
"type": "execute",
"parameters": {
- "command": ["mkdir", "-p", "/home/user/Documents/Departments/finance"]
+ "command": [
+ "mkdir",
+ "-p",
+ "/home/user/Documents/Departments/finance"
+ ]
}
},
{
@@ -29,37 +33,52 @@
]
}
},
- {
- "type": "execute",
- "parameters": {
- "command": [
- "tar",
- "-xzv",
- "--recursive-unlink",
- "-f",
- "/home/user/thunderbird-profile.tar.gz",
- "-C",
- "/home/user/"
- ]
- }
- },
- {
- "type": "launch",
- "parameters": {
- "command": "/usr/bin/thunderbird -compose \"from='Anonym Tester ',subject='Reminder of Payment',body='$(cat /home/user/.payment-reminder-mail-body.html)'\"",
- "shell": true
- }
- },
+ {
+ "type": "execute",
+ "parameters": {
+ "command": [
+ "tar",
+ "-xzv",
+ "--recursive-unlink",
+ "-f",
+ "/home/user/thunderbird-profile.tar.gz",
+ "-C",
+ "/home/user/"
+ ]
+ }
+ },
{
"type": "launch",
"parameters": {
- "command": ["nautilus", "/home/user/Documents/Departments/finance"]
+ "command": "/usr/bin/thunderbird -compose \"from='Anonym Tester ',subject='Reminder of Payment',body='$(cat /home/user/.payment-reminder-mail-body.html)'\"",
+ "shell": true
+ }
+ },
+ {
+ "type": "launch",
+ "parameters": {
+ "command": [
+ "nautilus",
+ "/home/user/Documents/Departments/finance"
+ ]
}
}
],
"trajectory": "trajectories/f5c13cdd-205c-4719-a562-348ae5cd1d91",
- "related_apps": ["thunderbird", "os", "libreoffice_calc"],
+ "related_apps": [
+ "thunderbird",
+ "os",
+ "libreoffice_calc"
+ ],
"evaluator": {
+ "postconfig": [
+ {
+ "type": "sleep",
+ "parameters": {
+ "seconds": 10
+ }
+ }
+ ],
"func": "check_accessibility_tree",
"result": {
"type": "accessibility_tree"
@@ -67,11 +86,27 @@
"expected": {
"type": "rule",
"rules": [
- {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"fox@someuniversity.edu\"]"]},
- {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"iron@someuniversity.edu\"]"]},
- {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"nancy@someuniversity.edu\"]"]},
- {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"stella@someuniversity.edu\"]"]}
+ {
+ "selectors": [
+ "tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"fox@someuniversity.edu\"]"
+ ]
+ },
+ {
+ "selectors": [
+ "tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"iron@someuniversity.edu\"]"
+ ]
+ },
+ {
+ "selectors": [
+ "tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"nancy@someuniversity.edu\"]"
+ ]
+ },
+ {
+ "selectors": [
+ "tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"stella@someuniversity.edu\"]"
+ ]
+ }
]
}
}
-}
+}
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json b/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json
index 0a7673f..c45ecbb 100644
--- a/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json
+++ b/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json
@@ -26,7 +26,7 @@
}
}
],
- "trajectory": "trajectories/",
+ "trajectory": "trajectories/f918266a-b3e0-4914-865d-4faa564f1aef",
"related_apps": [
"vscode",
"os"
diff --git a/requirements.txt b/requirements.txt
index f6e2da6..2c595b9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -44,6 +44,8 @@ dashscope
google-generativeai
PyYaml
mutagen
-pytesseract
+easyocr
borb
+pypdf2
pdfplumber
+