Merge branch 'main' of github.com:xlang-ai/DesktopEnv

This commit is contained in:
Jason Lee
2024-03-10 14:50:17 +08:00
27 changed files with 173 additions and 100 deletions

View File

@@ -1,5 +1,8 @@
# DesktopEnv: An Environment towards Human-like Computer Task Mastery
# OSWorld: Open-Ended Tasks in Real Computer Environments
<p align="center">
<img src="desktop_env/assets/icon.jpg" alt="Logo" width="80px">
<br>
<b>SLOGAN</b>
</p>
@@ -8,7 +11,7 @@
<a href="">Paper</a>
</p>
![Overview](media/overview.png)
![Overview]()
## Updates
- 2024-03-01: We released our [paper](), [environment code](), [dataset](), and [project page](). Check it out!

BIN
desktop_env/assets/icon.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.7 KiB

View File

@@ -82,34 +82,28 @@ def get_info_from_website(env, config: Dict[Any, Any]) -> Any:
page.wait_for_load_state('load')
action = info_dict.get('action', 'inner_text')
if action == "inner_text":
ele = page.locator(info_dict['selector'])
expect(ele).to_be_visible()
ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == "attribute":
ele = page.locator(info_dict['selector'])
expect(ele).to_be_visible()
ele = page.wait_for_selector(info_dict['selector'], state='attached', timeout=10000)
infos.append(ele.get_attribute(info_dict['attribute']))
elif action == 'click_and_inner_text':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
link = page.locator(sel)
expect(link).to_be_visible()
link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
ele = page.locator(sel)
expect(ele).to_be_visible()
ele = page.wait_for_selector(sel, state='attached', timeout=10000)
infos.append(ele.inner_text())
elif action == 'click_and_attribute':
for idx, sel in enumerate(info_dict['selector']):
if idx != len(info_dict['selector']) - 1:
link = page.locator(sel)
expect(link).to_be_visible()
link = page.wait_for_selector(sel, state='attached', timeout=10000)
link.click()
page.wait_for_load_state('load')
else:
ele = page.locator(sel)
expect(ele).to_be_visible()
ele = page.wait_for_selector(sel, state='attached')
infos.append(ele.get_attribute(info_dict['attribute']))
else:
raise NotImplementedError(f'The action {action} is not supported yet.')

View File

@@ -96,16 +96,13 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
paths: List[str] = config["path"]
dests: List[str] = config["dest"]
cache_paths: List[str] = []
gives: Set[int] = set(config.get("gives", [0]))
for i, (p, d) in enumerate(zip(paths, dests)):
print("env cache_dir: ")
print(env.cache_dir)
_path = os.path.join(env.cache_dir, d)
print("_path: ")
print(_path)
file = env.controller.get_file(p)
if file is None:
#return None

View File

@@ -1,8 +1,13 @@
import logging
from typing import List
import openpyxl
def compare_conference_city_in_order( actual_city_list_path, expected_city):
logger = logging.getLogger("desktopenv.metrics.calc")
def compare_conference_city_in_order(actual_city_list_path, expected_city):
expected_city_list = expected_city["expected"]
print(f"Reading csv file from {actual_city_list_path}")
wb = openpyxl.load_workbook(actual_city_list_path)
sheet = wb.active
actual_city_list = []
@@ -12,16 +17,25 @@ def compare_conference_city_in_order( actual_city_list_path, expected_city):
# expected_city is the city that we want to compare with the actual city list
# must in order index
# debug
print("expected_city_list:")
print(expected_city_list)
print("actual_city_list_path:")
print(actual_city_list)
wrong_list = []
try:
for i in range(len(actual_city_list)):
if expected_city_list[i] not in actual_city_list[i]:
wrong_list.append(i)
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
if isinstance(expected_city_list[i], str):
if expected_city_list[i] not in actual_city_list[i]:
logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
return 0.
elif isinstance(expected_city_list[i], List):
if not any(possible_str in actual_city_list[i] for possible_str in expected_city_list[i]):
logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
return 0.
else:
raise TypeError("Expected city should be a string or a list of strings")
except:
return False
return True if len(wrong_list) == 0 else False
return 0.
return 1.

View File

@@ -65,7 +65,16 @@ def is_expected_installed_extensions(installed_extensions, expected) -> float:
print("installed_extensions: ")
print(installed_extensions)
expected_extensions = expected["expected"]
return 1 if expected_extensions == installed_extensions else 0. # must equal, no additional extensions allowed
# whether the expected extensions are installed
set_expected_extensions = set(expected_extensions)
set_installed_extensions = set(installed_extensions)
if set_expected_extensions.issubset(set_installed_extensions):
return 1.
else:
return 0.
def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
"""

View File

@@ -223,17 +223,13 @@ def compare_docx_images(docx_file1, docx_file2):
return 1
import pytesseract
import easyocr
def compare_image_text(image_path, rule):
if not image_path:
return 0
img = Image.open(image_path)
img_text = pytesseract.image_to_string(img)
reader = easyocr.Reader(['en'])
result = reader.readtext(image_path)
extracted_text = ' '.join([entry[1] for entry in result])
if rule['type'] == 'text':
return 1 if rule['text'] in img_text else 0
return 1 if rule['text'] in extracted_text else 0
else:
raise ValueError("Unsupported rule type")

View File

@@ -104,6 +104,27 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
return 0.0
import zipfile
from difflib import SequenceMatcher
import PyPDF2
def compare_pdf_content(content1, content2, text_similarity_threshold):
def extract_text_from_pdf(content):
with open("temp.pdf", "wb") as temp_pdf:
temp_pdf.write(content)
with open("temp.pdf", "rb") as temp_pdf:
pdf_reader = PyPDF2.PdfReader(temp_pdf)
text = ''
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
text1 = extract_text_from_pdf(content1)
text2 = extract_text_from_pdf(content2)
similarity_ratio = SequenceMatcher(None, text1, text2).ratio()
return similarity_ratio >= text_similarity_threshold
def compare_zip_files(actual: str, expected: str, **options) -> float:
"""
@@ -128,7 +149,12 @@ def compare_zip_files(actual: str, expected: str, **options) -> float:
content1 = zip_file1.read(file_name)
content2 = zip_file2.read(file_name)
if content1 != content2:
if file_name.lower().endswith('.pdf'):
if compare_pdf_content(content1, content2, 0.95):
continue
else:
return 0.0
elif content1 != content2:
return 0.0
return 1.0

View File

@@ -54,7 +54,7 @@
"type": "rule",
"rules": {
"type": "text",
"text": "$ ls\n"
"text": " Ls"
}
}
}

View File

@@ -37,7 +37,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1Ee1vNyG7gGpLKK2VlLfj6PxcmdkMdvqK&export=download&authuser=0&confirm=t&uuid=1f441c5d-b62d-4850-870f-8e8f113a4091&at=APZUnTWEvKSSkuGBWzen0S9L7aHP:1709727474803",
"dest": "pic.jpg"
"dest": "pic_Gold.jpg"
},
"result": {
"type": "vm_file",

View File

@@ -47,7 +47,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1g2Trt9oxQyW_sx8aIztFA0zNsE4yNw2x&export=download&authuser=0&confirm=t&uuid=342751c4-54f1-4760-9326-e7388845ded0&at=APZUnTV5BcbaxIZrDglWbs84Oxln:1709623697315",
"dest": "res.txt"
"dest": "res_Gold.txt"
},
"result": {
"type": "vm_file",

View File

@@ -63,7 +63,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=11kWQc1XFEqcIMuW0-NnZRSdv1199OmVI&export=download&authuser=0&confirm=t&uuid=694676fd-1ac9-4501-8acf-f48018494c7f&at=APZUnTV-koL51ka5dHum_HpGywv_:1709618406292",
"dest": "image.docx"
"dest": "image_Gold.docx"
}
}
}

View File

@@ -25,6 +25,12 @@
"path": "/home/user/Desktop/Robotic_Workshop_Infographics.pptx"
}
},
{
"type": "sleep",
"parameters": {
"seconds": 3
}
},
{
"type": "launch",
"parameters": {

View File

@@ -1,7 +1,7 @@
{
"id": "4c26e3f3-3a14-4d86-b44a-d3cedebbb487",
"snapshot": "libreoffice_impress",
"instruction": "I've noticed that the image on the second slide is too dim. Can you please enhance its brightness for me? Save the adjusted image on the Desktop and name it \"background\". Thank you!",
"instruction": "I've noticed that the image on the second slide is too dim. Can you please enhance its brightness for me? Save the adjusted image on the Desktop and name it \"background.png\". Thank you!",
"source": "https://www.quora.com/How-do-I-edit-a-photo-in-GIMP",
"config": [
{

View File

@@ -23,15 +23,6 @@
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://arxiv.org/abs/2005.14165",
"https://wallhaven.cc/"
]
}
},
{
"type": "download",
"parameters": {
@@ -105,7 +96,7 @@
"147",
"372",
"Deep learning",
"https://creativecoding.soe.ucsc.edu/courses/cs523/slides/week3/DeepLearning_LeCun.pdf"
"https://hal.science/hal-04206682/document"
]
},
"options": {

View File

@@ -61,12 +61,12 @@
{
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1kAp7ulAR_h4snny212yg1xyR1cMy3H2Q&export=download&authuser=0&confirm=t&uuid=3f6cb74c-63cc-4653-9083-00626ef2fc11&at=APZUnTWuXvVM2w1Q9h0hOsuX6thn:1709789680904",
"dest": "paper01.pdf"
"dest": "paper01_Gold.pdf"
},
{
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1UMimItX51tzNXkIMGPpHOdPNF5Dx0Tpy&export=download&authuser=0&confirm=t&uuid=97b668a6-2d0d-4389-ac5e-234e931b4328&at=APZUnTVvuvbAE8r7jpK8AkzGUzyw:1709790384938",
"dest": "ans.docx"
"dest": "ans_Gold.docx"
}
],
"result": [

View File

@@ -51,7 +51,7 @@
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");"
"import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5);"
]
}
}
@@ -60,7 +60,7 @@
"expected": {
"type": "rule",
"rules":{
"expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing","Montreal","San Diego","Lille","Montreal","San Juan","New York","Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm","Montréal","New Orleans","Long Beach","Vancouver"]
"expected": ["Scottsdale","Atlanta","Lake Tahoe","Banff","Beijing",["Montreal", "Montréal"],"San Diego","Lille",["Montreal", "Montréal"],"San Juan",["New York", "New York City", "NYC"],"Barcelona","Toulon","Sydney","Long Beach","Vancouver","Stockholm",["Montreal", "Montréal"],"New Orleans","Long Beach","Vancouver"]
}
},
"result": {

View File

@@ -1,7 +1,7 @@
{
"id": "81c425f5-78f3-4771-afd6-3d2973825947",
"snapshot": "libreoffice_calc",
"instruction": "Can you assist me in transferring the data from LibreOffice Calc in this file to a LibreOffice Writer table while preserving the original format as in calc file? Save the document as \"price.docx\" on the desktop.",
"instruction": "Can you assist me in transferring the data from LibreOffice Calc in the current sheet to a LibreOffice Writer table while preserving the original format as in calc file? Save the document as \"price.docx\" on the desktop.",
"source": "authors",
"config": [
{
@@ -63,7 +63,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1KbdlQC0qSAYewG8QnZgParnSwv3s3dub&export=download&authuser=0&confirm=t&uuid=15dcc25c-8168-425e-96e1-fd27e0d6904b&at=APZUnTVho4ZrREHf9DC4rKwdIi3R:1709557117932",
"dest": "price.docx"
"dest": "price_Gold.docx"
}
}
}

View File

@@ -56,7 +56,7 @@
"expected": {
"type": "rule",
"rules":{
"expected": ["Zoom Chrome Extension", "Speechify Text to Speech Voice Reader", "React Developer Tools", "Momentum", "Google Translate", "Web Store", "Chromium PDF Viewer", "Google Hangouts"]
"expected": ["Zoom Chrome Extension", "Speechify Text to Speech Voice Reader", "React Developer Tools", "Momentum", "Google Translate"]
}
},
"result": {

View File

@@ -36,7 +36,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1IKRu-dMFP4Aqzq5-4TOmOWVci0qvC27K&export=download&authuser=0&confirm=t&uuid=e2dabad2-5648-4bc3-a40f-f008089cd613&at=APZUnTVh5JD5nT3EvutwHIaSnJAT:1709633945616",
"dest": "Recruitment_and_retention_of_health_professionals_across_Europe.zip"
"dest": "Recruitment_and_retention_of_health_professionals_across_Europe_Gold.zip"
},
"result": {
"type": "vm_file",

View File

@@ -30,7 +30,7 @@
],
"trajectory": "trajectories/",
"related_apps": [
"os", "vlc"
"os", "vlc", "ubuntu_media_player"
],
"evaluator": {
"postconfig":[

View File

@@ -59,7 +59,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1r2KJv0H3foo1WlWnArxdXnaew-yixNqL&export=download&authuser=0&confirm=t&uuid=633cc27c-d38b-4c45-907d-025341b4af1c&at=APZUnTV8AW5F_aLVooprdfgt-Q-Z:1709547335200",
"dest": "notes.docx"
"dest": "notes_Gold.docx"
},
"result": {
"type": "vm_file",

View File

@@ -1,7 +1,7 @@
{
"id": "b337d106-053f-4d37-8da0-7f9c4043a66b",
"snapshot": "os",
"instruction": "Recently, I've been exploring the use of the Vim editor for code editing. However, the default settings don't display relative line numbers. Please search the internet for a tutorial on enabling relative line numbers and setting it as the default for my local Vim.",
"instruction": "Recently, I've been exploring the use of the Vim editor for code editing. However, the default settings don't display line numbers in Vim editor. Please search the internet for a tutorial on adding line numbers in Vim and setting it as default for my local Vim.",
"source": "authors",
"config": [
{

View File

@@ -54,7 +54,7 @@
}
}
],
"func": "compare_docx_tables",
"func": "compare_docx_files",
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/script.docx",
@@ -63,7 +63,7 @@
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1-Sol1W8S7Ybj-3KBJJarbcYUqS5wAQ1C&export=download&authuser=0&confirm=t&uuid=d967f546-b8f7-4ac2-b8fc-b1635f1cfbc4&at=APZUnTUazmbS2X3BSXDEQtJgobgf:1709559012053",
"dest": "script.docx"
"dest": "script_Gold.docx"
}
}
}

View File

@@ -7,7 +7,11 @@
{
"type": "execute",
"parameters": {
"command": ["mkdir", "-p", "/home/user/Documents/Departments/finance"]
"command": [
"mkdir",
"-p",
"/home/user/Documents/Departments/finance"
]
}
},
{
@@ -29,37 +33,52 @@
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"tar",
"-xzv",
"--recursive-unlink",
"-f",
"/home/user/thunderbird-profile.tar.gz",
"-C",
"/home/user/"
]
}
},
{
"type": "launch",
"parameters": {
"command": "/usr/bin/thunderbird -compose \"from='Anonym Tester <anonym-x2024@outlook.com>',subject='Reminder of Payment',body='$(cat /home/user/.payment-reminder-mail-body.html)'\"",
"shell": true
}
},
{
"type": "execute",
"parameters": {
"command": [
"tar",
"-xzv",
"--recursive-unlink",
"-f",
"/home/user/thunderbird-profile.tar.gz",
"-C",
"/home/user/"
]
}
},
{
"type": "launch",
"parameters": {
"command": ["nautilus", "/home/user/Documents/Departments/finance"]
"command": "/usr/bin/thunderbird -compose \"from='Anonym Tester <anonym-x2024@outlook.com>',subject='Reminder of Payment',body='$(cat /home/user/.payment-reminder-mail-body.html)'\"",
"shell": true
}
},
{
"type": "launch",
"parameters": {
"command": [
"nautilus",
"/home/user/Documents/Departments/finance"
]
}
}
],
"trajectory": "trajectories/f5c13cdd-205c-4719-a562-348ae5cd1d91",
"related_apps": ["thunderbird", "os", "libreoffice_calc"],
"related_apps": [
"thunderbird",
"os",
"libreoffice_calc"
],
"evaluator": {
"postconfig": [
{
"type": "sleep",
"parameters": {
"seconds": 10
}
}
],
"func": "check_accessibility_tree",
"result": {
"type": "accessibility_tree"
@@ -67,11 +86,27 @@
"expected": {
"type": "rule",
"rules": [
{"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"fox@someuniversity.edu\"]"]},
{"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"iron@someuniversity.edu\"]"]},
{"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"nancy@someuniversity.edu\"]"]},
{"selectors": ["tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"stella@someuniversity.edu\"]"]}
{
"selectors": [
"tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"fox@someuniversity.edu\"]"
]
},
{
"selectors": [
"tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"iron@someuniversity.edu\"]"
]
},
{
"selectors": [
"tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"nancy@someuniversity.edu\"]"
]
},
{
"selectors": [
"tool-bar[attr|id=MsgHeadersToolbar] label[name=To]~[attr|class=\"address-pill\"]>label[attr|class=\"pill-label\"][name*=\"stella@someuniversity.edu\"]"
]
}
]
}
}
}
}

View File

@@ -26,7 +26,7 @@
}
}
],
"trajectory": "trajectories/",
"trajectory": "trajectories/f918266a-b3e0-4914-865d-4faa564f1aef",
"related_apps": [
"vscode",
"os"

View File

@@ -44,6 +44,8 @@ dashscope
google-generativeai
PyYaml
mutagen
pytesseract
easyocr
borb
pypdf2
pdfplumber