From f4ec36bdfb2baf22927977998a74d132d64a8066 Mon Sep 17 00:00:00 2001
From: tsuky_chen <3107760494@qq.com>
Date: Sat, 9 Mar 2024 18:48:17 +0800
Subject: [PATCH 1/9] fix multi apps
---
desktop_env/evaluators/metrics/vscode.py | 28 +++++-
.../02ce9a50-7af2-47ed-8596-af0c230501f8.json | 2 +-
.../09a37c51-e625-49f4-a514-20a773797a8a.json | 2 +-
.../20236825-b5df-46e7-89bf-62e1d640a897.json | 2 +-
.../227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json | 2 +-
.../4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json | 2 +-
.../68a25bd4-59c7-4f4d-975e-da0c8509c848.json | 4 +-
.../81c425f5-78f3-4771-afd6-3d2973825947.json | 2 +-
.../8df7e444-8e06-4f93-8a1a-c5c974269d82.json | 2 +-
.../aad10cd7-9337-4b62-b704-a857848cedf2.json | 2 +-
.../b337d106-053f-4d37-8da0-7f9c4043a66b.json | 2 +-
.../bb83cab4-e5c7-42c7-a67b-e46068032b86.json | 4 +-
.../f5c13cdd-205c-4719-a562-348ae5cd1d91.json | 93 +++++++++++++------
.../f918266a-b3e0-4914-865d-4faa564f1aef.json | 2 +-
requirements.txt | 3 +-
15 files changed, 107 insertions(+), 45 deletions(-)
diff --git a/desktop_env/evaluators/metrics/vscode.py b/desktop_env/evaluators/metrics/vscode.py
index e37d04c..ed99ff6 100644
--- a/desktop_env/evaluators/metrics/vscode.py
+++ b/desktop_env/evaluators/metrics/vscode.py
@@ -104,6 +104,27 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
return 0.0
import zipfile
+from difflib import SequenceMatcher
+import PyPDF2
+
+def compare_pdf_content(content1, content2, text_similarity_threshold):
+ def extract_text_from_pdf(content):
+ with open("temp.pdf", "wb") as temp_pdf:
+ temp_pdf.write(content)
+ with open("temp.pdf", "rb") as temp_pdf:
+ pdf_reader = PyPDF2.PdfReader(temp_pdf)
+ text = ''
+ for page_num in range(len(pdf_reader.pages)):
+ page = pdf_reader.pages[page_num]
+ text += page.extract_text()
+ return text
+
+ text1 = extract_text_from_pdf(content1)
+ text2 = extract_text_from_pdf(content2)
+
+ similarity_ratio = SequenceMatcher(None, text1, text2).ratio()
+
+ return similarity_ratio >= text_similarity_threshold
def compare_zip_files(actual: str, expected: str, **options) -> float:
"""
@@ -128,7 +149,12 @@ def compare_zip_files(actual: str, expected: str, **options) -> float:
content1 = zip_file1.read(file_name)
content2 = zip_file2.read(file_name)
- if content1 != content2:
+ if file_name.lower().endswith('.pdf'):
+ if compare_pdf_content(content1, content2, 0.95):
+ continue
+ else:
+ return 0.0
+ elif content1 != content2:
return 0.0
return 1.0
diff --git a/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json b/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json
index 7dfc5f0..504a5d8 100644
--- a/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json
+++ b/evaluation_examples/examples/multi_apps/02ce9a50-7af2-47ed-8596-af0c230501f8.json
@@ -54,7 +54,7 @@
"type": "rule",
"rules": {
"type": "text",
- "text": "$ ls\n"
+ "text": " Ls"
}
}
}
diff --git a/evaluation_examples/examples/multi_apps/09a37c51-e625-49f4-a514-20a773797a8a.json b/evaluation_examples/examples/multi_apps/09a37c51-e625-49f4-a514-20a773797a8a.json
index 1b22b30..c2e5084 100644
--- a/evaluation_examples/examples/multi_apps/09a37c51-e625-49f4-a514-20a773797a8a.json
+++ b/evaluation_examples/examples/multi_apps/09a37c51-e625-49f4-a514-20a773797a8a.json
@@ -37,7 +37,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1Ee1vNyG7gGpLKK2VlLfj6PxcmdkMdvqK&export=download&authuser=0&confirm=t&uuid=1f441c5d-b62d-4850-870f-8e8f113a4091&at=APZUnTWEvKSSkuGBWzen0S9L7aHP:1709727474803",
- "dest": "pic.jpg"
+ "dest": "pic_Gold.jpg"
},
"result": {
"type": "vm_file",
diff --git a/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json b/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json
index 3c98c88..bd7c785 100644
--- a/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json
+++ b/evaluation_examples/examples/multi_apps/20236825-b5df-46e7-89bf-62e1d640a897.json
@@ -47,7 +47,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1g2Trt9oxQyW_sx8aIztFA0zNsE4yNw2x&export=download&authuser=0&confirm=t&uuid=342751c4-54f1-4760-9326-e7388845ded0&at=APZUnTV5BcbaxIZrDglWbs84Oxln:1709623697315",
- "dest": "res.txt"
+ "dest": "res_Gold.txt"
},
"result": {
"type": "vm_file",
diff --git a/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json b/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json
index 3b09d5d..be93d29 100644
--- a/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json
+++ b/evaluation_examples/examples/multi_apps/227d2f97-562b-4ccb-ae47-a5ec9e142fbb.json
@@ -63,7 +63,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=11kWQc1XFEqcIMuW0-NnZRSdv1199OmVI&export=download&authuser=0&confirm=t&uuid=694676fd-1ac9-4501-8acf-f48018494c7f&at=APZUnTV-koL51ka5dHum_HpGywv_:1709618406292",
- "dest": "image.docx"
+ "dest": "image_Gold.docx"
}
}
}
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json b/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
index ced78c2..a6396d7 100644
--- a/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
+++ b/evaluation_examples/examples/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487.json
@@ -1,7 +1,7 @@
{
"id": "4c26e3f3-3a14-4d86-b44a-d3cedebbb487",
"snapshot": "libreoffice_impress",
- "instruction": "I've noticed that the image on the second slide is too dim. Can you please enhance its brightness for me? Save the adjusted image on the Desktop and name it \"background\". Thank you!",
+ "instruction": "I've noticed that the image on the second slide is too dim. Can you please enhance its brightness for me? Save the adjusted image on the Desktop and name it \"background.png\". Thank you!",
"source": "https://www.quora.com/How-do-I-edit-a-photo-in-GIMP",
"config": [
{
diff --git a/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json b/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json
index b115c1f..7f30c25 100644
--- a/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json
+++ b/evaluation_examples/examples/multi_apps/68a25bd4-59c7-4f4d-975e-da0c8509c848.json
@@ -61,12 +61,12 @@
{
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1kAp7ulAR_h4snny212yg1xyR1cMy3H2Q&export=download&authuser=0&confirm=t&uuid=3f6cb74c-63cc-4653-9083-00626ef2fc11&at=APZUnTWuXvVM2w1Q9h0hOsuX6thn:1709789680904",
- "dest": "paper01.pdf"
+ "dest": "paper01_Gold.pdf"
},
{
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1UMimItX51tzNXkIMGPpHOdPNF5Dx0Tpy&export=download&authuser=0&confirm=t&uuid=97b668a6-2d0d-4389-ac5e-234e931b4328&at=APZUnTVvuvbAE8r7jpK8AkzGUzyw:1709790384938",
- "dest": "ans.docx"
+ "dest": "ans_Gold.docx"
}
],
"result": [
diff --git a/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json b/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
index 7070663..762fcc3 100644
--- a/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
+++ b/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
@@ -63,7 +63,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1KbdlQC0qSAYewG8QnZgParnSwv3s3dub&export=download&authuser=0&confirm=t&uuid=15dcc25c-8168-425e-96e1-fd27e0d6904b&at=APZUnTVho4ZrREHf9DC4rKwdIi3R:1709557117932",
- "dest": "price.docx"
+ "dest": "price_Gold.docx"
}
}
}
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json
index 3f33693..18452c6 100644
--- a/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json
+++ b/evaluation_examples/examples/multi_apps/8df7e444-8e06-4f93-8a1a-c5c974269d82.json
@@ -36,7 +36,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1IKRu-dMFP4Aqzq5-4TOmOWVci0qvC27K&export=download&authuser=0&confirm=t&uuid=e2dabad2-5648-4bc3-a40f-f008089cd613&at=APZUnTVh5JD5nT3EvutwHIaSnJAT:1709633945616",
- "dest": "Recruitment_and_retention_of_health_professionals_across_Europe.zip"
+ "dest": "Recruitment_and_retention_of_health_professionals_across_Europe_Gold.zip"
},
"result": {
"type": "vm_file",
diff --git a/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json b/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json
index 2e80af8..35d70cf 100644
--- a/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json
+++ b/evaluation_examples/examples/multi_apps/aad10cd7-9337-4b62-b704-a857848cedf2.json
@@ -59,7 +59,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1r2KJv0H3foo1WlWnArxdXnaew-yixNqL&export=download&authuser=0&confirm=t&uuid=633cc27c-d38b-4c45-907d-025341b4af1c&at=APZUnTV8AW5F_aLVooprdfgt-Q-Z:1709547335200",
- "dest": "notes.docx"
+ "dest": "notes_Gold.docx"
},
"result": {
"type": "vm_file",
diff --git a/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json b/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json
index 11b5b9a..cba0a02 100644
--- a/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json
+++ b/evaluation_examples/examples/multi_apps/b337d106-053f-4d37-8da0-7f9c4043a66b.json
@@ -1,7 +1,7 @@
{
"id": "b337d106-053f-4d37-8da0-7f9c4043a66b",
"snapshot": "os",
- "instruction": "Recently, I've been exploring the use of the Vim editor for code editing. However, the default settings don't display relative line numbers. Please search the internet for a tutorial on enabling relative line numbers and setting it as the default for my local Vim.",
+ "instruction": "Recently, I've been exploring the use of the Vim editor for code editing. However, the default settings don't display line numbers in Vim editor. Please search the internet for a tutorial on adding line numbers in Vim and setting it as default for my local Vim.",
"source": "authors",
"config": [
{
diff --git a/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json b/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json
index cad0642..dcc4baf 100644
--- a/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json
+++ b/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json
@@ -54,7 +54,7 @@
}
}
],
- "func": "compare_docx_tables",
+ "func": "compare_docx_files",
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/script.docx",
@@ -63,7 +63,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1-Sol1W8S7Ybj-3KBJJarbcYUqS5wAQ1C&export=download&authuser=0&confirm=t&uuid=d967f546-b8f7-4ac2-b8fc-b1635f1cfbc4&at=APZUnTUazmbS2X3BSXDEQtJgobgf:1709559012053",
- "dest": "script.docx"
+ "dest": "script_Gold.docx"
}
}
}
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json b/evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json
index 582a9a6..70e8be3 100644
--- a/evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json
+++ b/evaluation_examples/examples/multi_apps/f5c13cdd-205c-4719-a562-348ae5cd1d91.json
@@ -7,7 +7,11 @@
{
"type": "execute",
"parameters": {
- "command": ["mkdir", "-p", "/home/user/Documents/Departments/finance"]
+ "command": [
+ "mkdir",
+ "-p",
+ "/home/user/Documents/Departments/finance"
+ ]
}
},
{
@@ -29,37 +33,52 @@
]
}
},
- {
- "type": "execute",
- "parameters": {
- "command": [
- "tar",
- "-xzv",
- "--recursive-unlink",
- "-f",
- "/home/user/thunderbird-profile.tar.gz",
- "-C",
- "/home/user/"
- ]
- }
- },
- {
- "type": "launch",
- "parameters": {
- "command": "/usr/bin/thunderbird -compose \"from='Anonym Tester ',subject='Reminder of Payment',body='$(cat /home/user/.payment-reminder-mail-body.html)'\"",
- "shell": true
- }
- },
+ {
+ "type": "execute",
+ "parameters": {
+ "command": [
+ "tar",
+ "-xzv",
+ "--recursive-unlink",
+ "-f",
+ "/home/user/thunderbird-profile.tar.gz",
+ "-C",
+ "/home/user/"
+ ]
+ }
+ },
{
"type": "launch",
"parameters": {
- "command": ["nautilus", "/home/user/Documents/Departments/finance"]
+ "command": "/usr/bin/thunderbird -compose \"from='Anonym Tester ',subject='Reminder of Payment',body='$(cat /home/user/.payment-reminder-mail-body.html)'\"",
+ "shell": true
+ }
+ },
+ {
+ "type": "launch",
+ "parameters": {
+ "command": [
+ "nautilus",
+ "/home/user/Documents/Departments/finance"
+ ]
}
}
],
"trajectory": "trajectories/f5c13cdd-205c-4719-a562-348ae5cd1d91",
- "related_apps": ["thunderbird", "os", "libreoffice_calc"],
+ "related_apps": [
+ "thunderbird",
+ "os",
+ "libreoffice_calc"
+ ],
"evaluator": {
+ "postconfig": [
+ {
+ "type": "sleep",
+ "parameters": {
+ "seconds": 10
+ }
+ }
+ ],
"func": "check_accessibility_tree",
"result": {
"type": "accessibility_tree"
@@ -67,11 +86,27 @@
"expected": {
"type": "rule",
"rules": [
- {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"fox@someuniversity.edu\"]"]},
- {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"iron@someuniversity.edu\"]"]},
- {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"nancy@someuniversity.edu\"]"]},
- {"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"stella@someuniversity.edu\"]"]}
+ {
+ "selectors": [
+ "tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"fox@someuniversity.edu\"]"
+ ]
+ },
+ {
+ "selectors": [
+ "tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"iron@someuniversity.edu\"]"
+ ]
+ },
+ {
+ "selectors": [
+ "tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"nancy@someuniversity.edu\"]"
+ ]
+ },
+ {
+ "selectors": [
+ "tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"stella@someuniversity.edu\"]"
+ ]
+ }
]
}
}
-}
+}
\ No newline at end of file
diff --git a/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json b/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json
index 0a7673f..c45ecbb 100644
--- a/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json
+++ b/evaluation_examples/examples/multi_apps/f918266a-b3e0-4914-865d-4faa564f1aef.json
@@ -26,7 +26,7 @@
}
}
],
- "trajectory": "trajectories/",
+ "trajectory": "trajectories/f918266a-b3e0-4914-865d-4faa564f1aef",
"related_apps": [
"vscode",
"os"
diff --git a/requirements.txt b/requirements.txt
index fdcb95d..5d6a87e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -44,5 +44,6 @@ dashscope
google-generativeai
PyYaml
mutagen
-pytesseract
+easyocr
borb
+pypdf2
From 5b07ec17bfe507f06015319c653bf96f3737732c Mon Sep 17 00:00:00 2001
From: tsuky_chen <3107760494@qq.com>
Date: Sat, 9 Mar 2024 18:50:16 +0800
Subject: [PATCH 2/9] fix multi apps
---
desktop_env/evaluators/metrics/docs.py | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py
index e588e74..0787926 100644
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -200,14 +200,13 @@ def compare_docx_images(docx_file1, docx_file2):
return 1
-import pytesseract
-
-
+import easyocr
def compare_image_text(image_path, rule):
- img = Image.open(image_path)
- img_text = pytesseract.image_to_string(img)
+ reader = easyocr.Reader(['en'])
+ result = reader.readtext(image_path)
+ extracted_text = ' '.join([entry[1] for entry in result])
if rule['type'] == 'text':
- return 1 if rule['text'] in img_text else 0
+ return 1 if rule['text'] in extracted_text else 0
else:
raise ValueError("Unsupported rule type")
From b0607c4f7909bb3e446dca94a7613ff708a3b502 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 9 Mar 2024 19:32:05 +0800
Subject: [PATCH 3/9] Fix bugs imported by Xiaochuan xs
---
desktop_env/evaluators/getters/file.py | 17 ++++++++---------
1 file changed, 8 insertions(+), 9 deletions(-)
diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py
index 39394d0..4b5428a 100644
--- a/desktop_env/evaluators/getters/file.py
+++ b/desktop_env/evaluators/getters/file.py
@@ -87,15 +87,14 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
paths: List[str] = [config["path"]]
dests: List[str] = [config["dest"]]
print(config)
- if "time_suffix" in config.keys() and config["time_suffix"]:
- if "time_format" in config.keys():
- time_format = config["time_format"]
- # Insert time before . in file type suffix
- paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths]
- dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests]
- else:
- paths: List[str] = config["path"]
- dests: List[str] = config["dest"]
+
+ if "time_suffix" in config.keys() and config["time_suffix"]:
+ if "time_format" in config.keys():
+ time_format = config["time_format"]
+ # Insert time before . in file type suffix
+ paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths]
+ dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests]
+
print(paths)
print(dests)
From 8d1264b627a111943dc0601b64d5315963941ae4 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 9 Mar 2024 19:58:28 +0800
Subject: [PATCH 4/9] Update README
---
README.md | 4 ++--
desktop_env/assets/icon.jpg | Bin 0 -> 8943 bytes
2 files changed, 2 insertions(+), 2 deletions(-)
create mode 100644 desktop_env/assets/icon.jpg
diff --git a/README.md b/README.md
index 71b5302..3e476fc 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# DesktopEnv: An Environment towards Human-like Computer Task Mastery
+# OSWorld: Real Computer Environments with Open-Ended Tasks for Building Generalist Agents
SLOGAN
@@ -8,7 +8,7 @@
Paper
-
+
## Updates
- 2024-03-01: We released our [paper](), [environment code](), [dataset](), and [project page](). Check it out!
diff --git a/desktop_env/assets/icon.jpg b/desktop_env/assets/icon.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1879c95d566b22cef110aeaaff68fc292464e6c8
GIT binary patch
literal 8943
zcmds+Wl)^Yx90~ZxWiyUh9HAG!GjJ2cTHfh!C`^~0wfR|1`8V8b&%i|Ab4R76%rN<>mZLETbYLD@t_LQ>aF
z&*X)zqm!eYme*@fJAX@iN9aF;V31Q#&{5L~(9;V*pGiK0{+I1f4}csW1Aqa*!e9en
zl4D?zWBlm{JO%(TaQ=GxkHi1NFfg&Oad7eQ9}y7#?a)L9z{J49!ok(?3BXw%(zS2xw^Ofb<+sIJvlaM8(7<
zB&D7zDXXZesY5gj42_IUOwG)pcJ>aAPB3TqD<5A!|JMOw@53V^qoQMyQ$D1orDtSj
z6&4kjl$MoOR5pBSY-(<4ZENrQ(myabG(0joGdnlGu(-60+T7Yk@9ge<-#<7#`*nVC
zd3Akr`w!PYod47QJJ^5XBLB;UiH(hgjrR{12BzO%VUc6wunOT)DC*(acu}$mzr&|`
zmQ+yR`-ojc|0lJr_cQ?wNOY6q^dGc;Bl|xC3;n+$`**Pa#f1V8VPX88JS=j6Jm8#w
ztMKjrcj5+yRyeS5p;J(8u^sVB*6j#capUGXi2Q4wddOouJmxu!mU3r3QzBZ{+TYhqnnbJ=t
zX}x^>vdg1C$^4)Lioox$q9RV^^khWC=VqYdj<01D7u5yiSR@y&%kF9YVa
zlq~g`1^uzNv2K1@A`2?z@D18F3K43*D(1h(8@g4;`lZ7UOxRFsgi?D>59G6chcoT6
z3}2GWU&GhP#rhC*6;^E%fp_ydkDMaFP(Po%M(G{TkoVz|`?*c?Ms9^1BlSECRSBO|
zKD!@##cgE<`KNigyPN(5Tp*!_^Jg{8KOTuJ*6)>x@x6z_^ZognKKddK0J`M8(8-;c
zkrvHbB5mB=JS}Kw{!69R&)#a*JYebkx@R9JwsPR+U~l>b^(VRqqYA^5PqH0tMl(GR
zn<#A3<1Iq%ctg`dY@BV
zj=deDL=#h1Hsn(`8PMzK6%o(C)yheC|8#y7z*QLYFUsVtAkyKjrY56tkOfcyZo$kR
zWT;O6S+8=PKLD9RPkiRi`7RN?1iiQI@yUZL98uXBE3Kj!DbW!M8ZF+!Fa^nQTctIJXk43(g>9D$OM*nu;s1|*vMB@M<06-_72Z7_J
z8c!X!O&Zd4WR$BV$m34)4vnU)v}2WL^QX>l_)i{FHb4I*By&H+__)USr_XzvS~1+{
z1sJD~iEI3HPO;a;iMbp%%XbMq($8!*YPD>iL=(sqY4WHPKV|prhLYtS^-1vZg!MBW>wE=C_L8Tdj&h4u;))j?&JNb#I2w7W&9b+3M8JL!Qs
ze3LLxbumj;(`l*gz50sxW`xk;M$CFc#w3l7Qqzi)IAR%6_IJP(p$ls`@Lx2nwr?`45xQYH_+{OTSa*@@yvIjhA
z4)1yZb}5bXqg+^{AOpD;=YT(
zGK;V++wgPVu`q1O8!$M~i=;W~aoE0AKyLKY6dpeRK-mWtV(v7_h}ubmRewwH5~h$E
z(#Ytxn=Vb7Tj~DX+S(8KHS$B4_Ry?uzvWc=sVnH>@PWF`(W$vS*zN3bC)(TjqUuLF
zgZJYd`FI$;GgqI_O6v6F3M~Dq0pH%vbkZ337zqv@Qf{98MxpVA%a4Xv=o)SOL6hFz
zgfMr#DRWQm6Bn5f4ZgP>)06!VEZ<_pm!dTc){@sJMSrBFDMgb>eTW-0NK|0&g27PF
zz08YWl7hQWI-NBcoSLhhec#g&;L%Z+1N7eYP-mC8ksEFXnN3*xYmBFyjrB*-y$UwU<7km>)BTljd5C;_IVlULOvluA{=mDv!=>bD>@SOWfPdrc)z0M
zorRm@RsNzp{E?Boh@DrW*W$i)yo;lUqkZ$1*z@M6jx9}I+T5B-%_5yXQVAHb(HO!n2%#QqMT8_clzH;+$#eR6%M9k3h?+fLCuyNFcp?aR!Y+%zXU(_{q@387PoB
zWBj@OE0`Wv6%%o`_FExveMHc~nwq62+kGmJrnq_tHY%9ouR
zMm>HYF%WBan(OtOJKbb#VkOv!z07if?_!}hO_C|0HY)v@V+I8v
z_T^SX6P-hhQI@aPubt6a9GwED$~hAqtl0e;qIwB(ifh!D)@@X|wPn}Zm$5&9e#lUX
zo}Mvi_WUqjHH2r)JET*w=ia5}(@&N&wG|F#_t$563G>b;kq&jUuQDv(L+n&+MZS8C
zRLx$$E6B5{Ps|^*TYD=TXd+rEQ9n!X+E%=;3-j&cQsl$}ooUOH2KZJ;q`C$${#4wH
zqmLJJLMwdFG??rDgA3?FdO
zu`%9IGM`xp(%$j#ShVFH)AJVpc7@v<#^UxCsc=`AKj#n_B!1xklUdSgk-Zf==GXOw
z5OwREzOI-Q{#eF{6yxbklg2J_d2Wh|<9Cn1CrMb&3b75O8TeS&2ubFp8iM4|7_NNG
ze-}?~EyLGhYc)Ct-|)Wh18ou^1)|OcURUH+Q(K8
zv{mA6^S2DVcrqqZV(50?JdV_X#wjgc<;SAgb?i)cCMIZ(lee1@7UA#A*F<Jv#&-JcIfw`;s-T6#1oO|>A7?pR%$IhZszQROvPx2m
z>%bt22)7MEPfxFy?>@3dDbEex8A;fvt^+KYQwN+s6}b@0sEx(@X4h(mrb(A}rAe|!
zUw*=Pf*Wkmd+T#(dBM&tOERiW9=}{s6ntgmGGZ2)XeJbQJS?-@?!O0tJj#_-fez63
zQVh`bmG@A4GbAzM2>QXp%)OTxKjn!coosOXccLA{G!A^Q
z_D>cnRQs*bei9b`07|pV;r6t`<{5W##uNLaK{YYPBeD6R6f&tntNO&M)vVC7EiS(C
z_yQbKN=Ye-L~s1_f=}xG35K0_Ru>Jj3FKl8Wi%3XV0%D(^o2Sv`2XB+0Rn(@3%DuNJt
z;&AF5GduvAgp`SgkJPXZF^ro8?E%`ZO0<^SE%G->o8f+hvFZ*GRsAB#c^Viqz4j?b
z%;n>yL^CQUYt=$BnRXZ*j7{eJUJf{GGl#l?+K#8}Fw*s2U{Sx@{7q>*UF5Q~!=U~|
zQ$O<6TVLk)m$!=$4pL{ATfkKh&B8_
zocB^E6axd19a<#^(IBuM5iy1HkDnDZN~1vXsDiwsQ{yHx|BQZ@c)gnyd)vb6+|h(wX9=j=|p
z!^*&|>8`1>f0Lgb=pc*P(j=lQajO)RW@ONj>f@R$>kWnu1d}^yTJI;N#btLAJYt_q
z@NnL%ZmL>gRBf=KjqgyqHjd7eOi^}QR3WYdMjS0$cb=vgH)`U(Wyq8>_Ig`XIvYy-
zR62*u?Pa9|4U?zrHPvt+{VNIu$wO1P^P>0<
z8h^X^s9~afJ@H~@na1)ZG(Z)WG&;`hV5_Yyw20f_aU#C$H1fbKmkMl8wj)SXmw06>
zc{^wDoX@aQP()(gW|AG1sllFwMlFll)O)epHC!yIJ^usH7|&0Ja8^`#!OY)JS0|ZG
zeE-6l`Up3Fo-K#k8SJQ;F)*fG(TPihKRn6SMF~lsT2`aZUy72SPaCn;G1pq+PY|lf
zVICZX5~T~vT_)l@$E=1=omxF<`<=wwr@8cIS7oJDU3ERc(}fiB^Rkj(LPmAj>#EqA
zAvZMkisp{N@2Nv>+K(AU3a&vjb|B*d&P1%2pBICvc?Y|o)>q(CHyV*zBM|8$;lS;O
zS@B)a|2LRcr>lEPV3=hvB-$+a=j7*^*h>zu`-}FUyDd-~a1v$Vc2s6&`z2k?IHwh2
zly0-F@s0-v*so&t5nz|_#;0`EzrHk7%57+Q5zqJjaf$v5A}g1-)?cNXY{e@Z+*iAl
z9A!yDG;(uqf`Vo(3{OF};}1QA!+PzZ_}g*D)DdAJTfnW(xph6~(G{JK>5RF8@ON1?
z@ce@;t9vm0&8fw3EyLTY)}PuKQZ*efXftLEyTjj?dri=K%;~uZyz*OI7v;N`xwTVl
zT)7+czfpJic%^(xhNqEn9l{8q5l+7l+hfah3L$_Dsgcif;TjDwaoFQZOtfxv=Zqsp9VP(!0B_)z9u0k
z<7!rq8&{-%Cg3}s!&S|Z+^HBLo%@!tVX7ZsO@)Tuk9S&yDuPN+1wcKpc5VF!pb3Q+
zT59pm$e&R9>!jCO#x1^pODrF)d+lx_TxFryXZq6>^Vb-q98_u~wQ{S9O!J
zL<+YKs&z?rru@cEV-1ZD*~zn|^Ala{)V$J86QJny>o&Tm9H5FM7na-3K5g|yIu0ml
zq`ZkK-O#mk;QBzfk*n>skhxi?F2!{rOwN0A`m@v-ct;`z@MqH$vV@
zeC#&apo$I_lfYd2GMHb$$9D64+b~XE9M>=T{tJZ||7@)kt=B(R!*7YaPFwuX%m*$6b#bqBE)S<3F(tP~^^~f&ji1%x2!eD;#!&Joz
z4B+Z@*M8WkCUe`J!YfX5!ouN)W0}1tTP5}e!_$y5h>o)*&HuOa%~2Zx_YZgl+{gP(
zhd*6*(9(x&(!N@yO*j2rP@YhNp!=74;_FkB7bzQ^uZ;_uSAr@%^|$&Hdw?zgeH|N>aXEi2y%+PUOczmLMp;oMEo8j!nu!QP>x2)^}cv3t#4LkdJOp4V$#&LP)I#yal$C)jUg#JQWTn(O_Qz
z+WzJOdyKN)n=b&7l^FNSE9K-D17etdn*L!>PQ>n9QQaakq#;>3J$k8f*teAfBKOo#Jfi>
z0iMn6YAydE!_q?D*gpG}V`2-#ESe$E=iY_ApBWenF{^#qscghr)WQSlW%1!$Z)^;q
zo|H{dyo7PKsYt)cmjnPv1)t^Lgc>_A)xYH_P1pi|Wn9wQ
zX|+6Z7?J!&0zWd@jQImFMELomW?bWdDYcsh-S0|RAsj7hfbKTNm3-$ay
zE6oCTM@jXP`c*yW6WplhsrL09et~7C!*P79#MxCKC3fnZ*ISR66)(L~*>k>K+#lw;
zgs)bHjX{@RSDgRmIn;06<=l?XVA-lrVVOS`KE5bcMZngE@i3w#x4ItpZJhBf{Q)e>
zA6g!N+yr7*TZ`Z^isY<5_$-;Oe6*0v=WER$^opAsNyj!VjNu_9cqWWLA-nICPvUJw
zX_R*Q;ZOqN{2GUWNVY1N4%!l>LcqgzZbZj;=BzQg$cyCXvovpGxX#4bRbhJf19r@Z
z2pNQH_yh)q2;NvL`aD1V>??9nDMvj$Q3Chj-NOL!jOTq66WfbyqE`Rnp0h~Gz{UAh
z_T84rq+gRwipE1#|&6r2r0Ph)gyv4i&qeLI3MDfXwGJ~R$f6Eb%rviedF#7
z1!^F?y?ks$)e1yC7f9`G)wx%L@oZ`xben}b9Y>IS7Zc;e%17NQDeyr|G(rTG&K6Lk
z*xV=$boCGzi+;_5;EXFcvshgE?LzUZ=alb5=)xJ4qbH)i}&N1|Atg+nm#^hoc7@BDGctO23I>7#mF)b~#Vz~!V7B0E`${Yl-R%CTUQ=}YdcrOu3-k7G6|s~1QEi`nhzT9AubkrN@D4Ac-n
zlviW<+OB_AeZS^j?6N<`%Id2QDWF(^eB^m1OR{|3imH`Hr0E>>@PX_h
zsphFoGKL#BspqQd4OU7I-2h*IKzF^6G1Q3#gqtj=oOyo#**vWz$P7}GEIiQX4>u58
znmEQ_O;b8c7M8-IE|HHa?M&n-cDq|;V0xy8QZ{=in0-4yrzO4j8=4W(G*ga9NnR8s=N4^W`X(;bdX5}Y>
zcmRPePPPx)F3)~8j(@`){BhzzH*r8iU?`
z-(lC7z7kiSD!lQo^$|wU9x28}z4GsuEeC%2YBK4%!ol5*iAwgHzeQD;m0#vmYMcFC
zgYYjn>q=%X{7>wJAVkfdE=*Hns`JmJabCD}_bTYo&7R-Z-e
z9=^&OeTIzCjVF2Hv1QJ>LwwPuR6-n^3KD_z+r^=QI-L^AB)E7W->LwIDzPFUU({FP
zR+}tsCR+llfsTf*nF|cD-eo!tcF$#y-FBJAe&Y#wRCShg#F{W^Vl(7T%G}(oLlJJ3
zTZ2}itD8N@?sNoeCO^FoWl`^E<{h$MP~oiuVO^=@LALo-TSJJ$sBM5CF!hk@Lt#Zo
zpLF?)7a?VzKzPzQc0~P`Pud?hn!}8fdJyE~+CAeVLyI-GYu5?mO6bASjXn#y(ze|G
z@A1SEDzlBnoP^THs|^}48hKCzH~$+I55@#$&}cRV;17Tv&Cfp!8vfz^7bz3{A*5*V
z2vN-M+(xUr>$=cA@6OM`-yo{d24H1p-^guB;MgS{#$djHiE6I2Da2JH)D%cUQeV-?
z5xypWBH#Yfo93C~_6oI0U9uFJLr0=&&Yfe-+SD5fjcFwK%;?c?AW1qm0?yob%yxewh4W)4k}y06-tDN+-Ka?{k;EbVk!??8E3s
zD3rf>M^C{~)&C*T|JDfg>8%RdEGldu?uO{+qY|V#qsZxhs~j?VMEgL_Pbg98QJOAI
zf}rQ&(rxxf9ZUzuRbpu+FH=w(7UtG3T{CwGPUFyVVM*sTkI9_=09K6kR&I7NlOJKd
zUz;L7kafxPz@VjbGaLQ`K+6A4mBp!;h0$l5cZFK@Raf?hGL*B|Sg{DQ#D&e#hmKCH
z_bs~}LCO0&!>Mf($E?(pTU<0FA}D5(K44u3g?rIQgu@-Wot1wtDA=4W4smsB$_!
zLIDk_ZEhax8J5^G1~MDKd_*bUAWxs;3O~YLXz-6YwNYP&ra0YAChN@YHa3OeL0C+h
zgMK`12CzSlhvzLPhlTluoFqJS!HPuul>N-ZG`{J8rI;{4+hW+7va7AJTwztvm_yM#
z9CnLG?!_zIuC-rtrTvguD{7x2u;cR)s#xjZkz#a=o+Ox*WEB$3o=Mn(y`zp2nY%Ob
zgB6%sC{$cJWxl=n=q^Fu%@%JbjIE1)*(}ADr@P}l=V}r$x`4Mq-08G5XyoZUKxO#h
epOeLZNo%pcr*90(|K*D7U$49V-?mBrEdMum!iG}-
literal 0
HcmV?d00001
From bec82726bc46e53b913b15c09c7de9d6aa556c2a Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 9 Mar 2024 20:23:55 +0800
Subject: [PATCH 5/9] Update README
---
README.md | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 3e476fc..8eb867f 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
-# OSWorld: Real Computer Environments with Open-Ended Tasks for Building Generalist Agents
+# OSWorld: Open-Ended Tasks in Real Computer Environments
+
+
+
SLOGAN
@@ -8,7 +11,7 @@
Paper
-
+![Overview]()
## Updates
- 2024-03-01: We released our [paper](), [environment code](), [dataset](), and [project page](). Check it out!
From 447c886b0a8c084ef3f0c24bfad2e72a1ae268f8 Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 9 Mar 2024 20:54:52 +0800
Subject: [PATCH 6/9] Fix multiple apps 5990457f-2adb-467b-a4af-5c857c92d762
---
desktop_env/evaluators/getters/chrome.py | 18 ++++++------------
desktop_env/evaluators/getters/file.py | 9 ---------
.../5990457f-2adb-467b-a4af-5c857c92d762.json | 11 +----------
3 files changed, 7 insertions(+), 31 deletions(-)
diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py
index 0304827..13a5385 100644
--- a/desktop_env/evaluators/getters/chrome.py
+++ b/desktop_env/evaluators/getters/chrome.py
@@ -82,34 +82,28 @@ def get_info_from_website(env, config: Dict[Any, Any]) -> Any:
page.wait_for_load_state('load')
action = info_dict.get('action', 'inner_text')
if action == "inner_text":
- ele = page.locator(info_dict['selector'])
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == "attribute":
- ele = page.locator(info_dict['selector'])
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.get_attribute(info_dict['attribute']))
elif action == 'click_and_inner_text':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
- link = page.locator(sel)
- expect(link).to_be_visible()
+ link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
- ele = page.locator(sel)
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(sel, state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == 'click_and_attribute':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
- link = page.locator(sel)
- expect(link).to_be_visible()
+ link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
- ele = page.locator(sel)
- expect(ele).to_be_visible()
+ ele = page.wait_for_selector(sel, state='attached')
infos.append(ele.get_attribute(info_dict['attribute']))
else:
raise NotImplementedError(f'The action {action} is not supported yet.')
diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py
index 4b5428a..d6e2087 100644
--- a/desktop_env/evaluators/getters/file.py
+++ b/desktop_env/evaluators/getters/file.py
@@ -95,19 +95,12 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths]
dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests]
- print(paths)
- print(dests)
-
cache_paths: List[str] = []
gives: Set[int] = set(config.get("gives", [0]))
for i, (p, d) in enumerate(zip(paths, dests)):
- print("env cache_dir: ")
- print(env.cache_dir)
_path = os.path.join(env.cache_dir, d)
- print("_path: ")
- print(_path)
file = env.controller.get_file(p)
if file is None:
#return None
@@ -121,8 +114,6 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
with open(_path, "wb") as f:
f.write(file)
# debug
- print("cache_paths")
- print(cache_paths)
return cache_paths[0] if len(cache_paths)==1 else cache_paths
diff --git a/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
index 9d754dd..eb77ab5 100644
--- a/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
+++ b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
@@ -23,15 +23,6 @@
]
}
},
- {
- "type": "chrome_open_tabs",
- "parameters": {
- "urls_to_open": [
- "https://arxiv.org/abs/2005.14165",
- "https://wallhaven.cc/"
- ]
- }
- },
{
"type": "download",
"parameters": {
@@ -105,7 +96,7 @@
"147",
"372",
"Deep learning",
- "https://creativecoding.soe.ucsc.edu/courses/cs523/slides/week3/DeepLearning_LeCun.pdf"
+ "https://hal.science/hal-04206682/document"
]
},
"options": {
From b0854e519cf1108192a268048a23dbe5fe2c2ecd Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 9 Mar 2024 21:20:44 +0800
Subject: [PATCH 7/9] Minor fix on instruction of
81c425f5-78f3-4771-afd6-3d2973825947
---
.../multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json b/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
index 762fcc3..e564020 100644
--- a/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
+++ b/evaluation_examples/examples/multi_apps/81c425f5-78f3-4771-afd6-3d2973825947.json
@@ -1,7 +1,7 @@
{
"id": "81c425f5-78f3-4771-afd6-3d2973825947",
"snapshot": "libreoffice_calc",
- "instruction": "Can you assist me in transferring the data from LibreOffice Calc in this file to a LibreOffice Writer table while preserving the original format as in calc file? Save the document as \"price.docx\" on the desktop.",
+ "instruction": "Can you assist me in transferring the data from LibreOffice Calc in the current sheet to a LibreOffice Writer table while preserving the original format as in calc file? Save the document as \"price.docx\" on the desktop.",
"source": "authors",
"config": [
{
From e481afcf5cd0b9af35ebb88c1f04f8054158023d Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sat, 9 Mar 2024 23:01:22 +0800
Subject: [PATCH 8/9] Fix multiple examples
---
desktop_env/evaluators/metrics/calc.py | 38 +++++++++++++------
desktop_env/evaluators/metrics/chrome.py | 11 +++++-
.../6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json | 4 +-
.../873cafdd-a581-47f6-8b33-b9696ddb7b05.json | 2 +-
4 files changed, 39 insertions(+), 16 deletions(-)
diff --git a/desktop_env/evaluators/metrics/calc.py b/desktop_env/evaluators/metrics/calc.py
index 701be27..0ff0744 100644
--- a/desktop_env/evaluators/metrics/calc.py
+++ b/desktop_env/evaluators/metrics/calc.py
@@ -1,8 +1,13 @@
+import logging
+from typing import List
+
import openpyxl
-def compare_conference_city_in_order( actual_city_list_path, expected_city):
+logger = logging.getLogger("desktopenv.metrics.calc")
+
+
+def compare_conference_city_in_order(actual_city_list_path, expected_city):
expected_city_list = expected_city["expected"]
- print(f"Reading csv file from {actual_city_list_path}")
wb = openpyxl.load_workbook(actual_city_list_path)
sheet = wb.active
actual_city_list = []
@@ -12,16 +17,25 @@ def compare_conference_city_in_order( actual_city_list_path, expected_city):
# expected_city is the city that we want to compare with the actual city list
# must in order index
# debug
- print("expected_city_list:")
- print(expected_city_list)
- print("actual_city_list_path:")
- print(actual_city_list)
- wrong_list = []
try:
for i in range(len(actual_city_list)):
- if expected_city_list[i] not in actual_city_list[i]:
- wrong_list.append(i)
- print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ if isinstance(expected_city_list[i], str):
+ if expected_city_list[i] not in actual_city_list[i]:
+ logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ return 0.
+
+
+ elif isinstance(expected_city_list[i], List):
+ if not any(possible_str in actual_city_list[i] for possible_str in expected_city_list[i]):
+ logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
+ return 0.
+
+ else:
+ raise TypeError("Expected city should be a string or a list of strings")
+
except:
- return False
- return True if len(wrong_list) == 0 else False
\ No newline at end of file
+ return 0.
+
+ return 1.
diff --git a/desktop_env/evaluators/metrics/chrome.py b/desktop_env/evaluators/metrics/chrome.py
index 5dc2a48..c67ec09 100644
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -65,7 +65,16 @@ def is_expected_installed_extensions(installed_extensions, expected) -> float:
print("installed_extensions: ")
print(installed_extensions)
expected_extensions = expected["expected"]
- return 1 if expected_extensions == installed_extensions else 0. # must equal, no additional extensions allowed
+
+ # whether the expected extensions are installed
+ set_expected_extensions = set(expected_extensions)
+ set_installed_extensions = set(installed_extensions)
+
+ if set_expected_extensions.issubset(set_installed_extensions):
+ return 1.
+ else:
+ return 0.
+
def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
"""
diff --git a/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json b/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
index 4792da5..414aab4 100644
--- a/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
+++ b/evaluation_examples/examples/multi_apps/6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a.json
@@ -51,7 +51,7 @@
"command": [
"python",
"-c",
- "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
+ "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5);"
]
}
}
@@ -60,7 +60,7 @@
"expected": {
"type": "rule",
"rules":{
- "expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing","Montreal","San Diego","Lille","Montreal","San Juan","New York","Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm","Montréal","New Orleans","Long Beach","Vancouver"]
+ "expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing",["Montreal", "Montréal"],"San Diego","Lille",["Montreal", "Montréal"],"San Juan",["New York", "New York City", "NYC"],"Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm",["Montreal", "Montréal"],"New Orleans","Long Beach","Vancouver"]
}
},
"result": {
diff --git a/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json b/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json
index 2d9f175..f1cb660 100644
--- a/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json
+++ b/evaluation_examples/examples/multi_apps/873cafdd-a581-47f6-8b33-b9696ddb7b05.json
@@ -56,7 +56,7 @@
"expected": {
"type": "rule",
"rules":{
- "expected": ["Zoom Chrome Extension", "Speechify Text to Speech Voice Reader", "React Developer Tools", "Momentum", "Google Translate", "Web Store", "Chromium PDF Viewer", "Google Hangouts"]
+ "expected": ["Zoom Chrome Extension", "Speechify Text to Speech Voice Reader", "React Developer Tools", "Momentum", "Google Translate"]
}
},
"result": {
From a12dfacbd72b5f2818d56d46780fa221563bb31c Mon Sep 17 00:00:00 2001
From: Timothyxxx <384084775@qq.com>
Date: Sun, 10 Mar 2024 12:10:05 +0800
Subject: [PATCH 9/9] Add more time for waiting impress in
47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5
---
.../multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json | 6 ++++++
.../multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json | 2 +-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json b/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
index b778489..2cc3cdb 100644
--- a/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
+++ b/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
@@ -25,6 +25,12 @@
"path": "/home/user/Desktop/Robotic_Workshop_Infographics.pptx"
}
},
+ {
+ "type": "sleep",
+ "parameters": {
+ "seconds": 3
+ }
+ },
{
"type": "launch",
"parameters": {
diff --git a/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json b/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json
index 58c7ca7..89f5a21 100644
--- a/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json
+++ b/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json
@@ -30,7 +30,7 @@
],
"trajectory": "trajectories/",
"related_apps": [
- "os", "vlc"
+ "os", "vlc", "ubuntu_media_player"
],
"evaluator": {
"postconfig":[