Merge branch 'main' into zdy

This commit is contained in:
David Chang
2024-02-01 11:48:33 +08:00
32 changed files with 1315 additions and 152 deletions

View File

@@ -367,7 +367,7 @@ class SetupController:
context = browser.contexts[0]
page = context.new_page() # Create a new page (tab) within the existing context
page.goto(url)
page.goto(url, timeout=60000)
logger.info(f"Opened tab {i + 1}: {url}")
if i == 0:

View File

@@ -84,7 +84,6 @@ class DesktopEnv(gym.Env):
# Initialize emulator and controller
logger.info("Initializing...")
self._config_screen_size()
self._start_emulator()
self.vm_ip = self._get_vm_ip()
self.controller = PythonController(vm_ip=self.vm_ip)
@@ -105,60 +104,6 @@ class DesktopEnv(gym.Env):
self._step_no: int = 0
self.action_history: List[Dict[str, any]] = []
def _config_screen_size(self):
"""
fixme: Experimental features, will cause unexpected error when system corrupt
"""
def calculate_vram_size(width, height, bits_per_pixel=32):
"""
Calculate VRAM size for given width, height, and color depth.
Color depth defaults to 32 bits per pixel.
"""
bytes_per_pixel = bits_per_pixel // 8
vram_size = width * height * bytes_per_pixel
return vram_size
if not os.path.isfile(self.path_to_vm):
logger.warning(f"The specified vmx file does not exist: {self.path_to_vm}")
return False
width, height = self.vm_screen_size
vramSize = calculate_vram_size(width, height)
try:
with open(self.path_to_vm, 'r') as file:
lines = file.readlines()
new_lines = []
for line in lines:
if "svga.autodetect" in line:
continue
elif "svga.vramSize" in line:
continue
elif "displayWidth" in line:
continue
elif "displayHeight" in line:
continue
else:
new_lines.append(line)
# Append new settings for screen size and VRAM.
new_lines.append(f'svga.autodetect = "TRUE"\n')
new_lines.append(f'svga.vramSize = "{vramSize}"\n')
new_lines.append(f'displayWidth = "{width}"\n')
new_lines.append(f'displayHeight = "{height}"\n')
with open(self.path_to_vm, 'w') as file:
file.writelines(new_lines)
logger.info(f"Screen size for {self.path_to_vm} set to {width}x{height} with VRAM size {vramSize} bytes")
return True
except IOError as e:
logger.error(f"An IOError occurred: {e}")
return False
except Exception as e:
logger.error(f"An error occurred: {e}")
return False
def _start_emulator(self):
while True:
try:
@@ -280,7 +225,6 @@ class DesktopEnv(gym.Env):
_execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
time.sleep(5)
self._config_screen_size()
print(self.vm_screen_size)
logger.info("Starting emulator...")
self._start_emulator()
@@ -350,6 +294,7 @@ class DesktopEnv(gym.Env):
self.setup_controller.setup(self.evaluator.get("postconfig", []))
if type(self.metric) == list:
results = []
for idx, metric in enumerate(self.metric):
try:
config = self.evaluator["result"][idx]
@@ -366,11 +311,12 @@ class DesktopEnv(gym.Env):
**self.metric_options[idx]) if expected_state is not None \
else metric(result_state, **self.metric_options[idx])
if self.metric_conj == 'and' and not bool(metric):
if self.metric_conj == 'and' and float(metric) == 0.0:
return 0
elif self.metric_conj == 'or' and bool(metric):
elif self.metric_conj == 'or' and float(metric) == 1.0:
return 1
return 1 if self.metric_conj == 'and' else 0
else: results.append(metric)
return sum(results) / len(results) if self.metric_conj == 'and' else max(results)
else:
try:
result_state = self.result_getter(self, self.evaluator["result"])

View File

@@ -1,5 +1,12 @@
# Setup Instructions
## Overall
Disable the system crash report by:
```
sudo vim /etc/default/apport
```
and then change the `enabled` to `0`.
## LibreOffice
For LibreOffice, please enter into the app first, and then enable the no pop-up when 'ctrl + s'.
@@ -209,3 +216,5 @@ pip install opencv-python-headless Pillow imagehash
- Ensure VLC is running and the correct port (default is 8080) is being used.
- If the port is in use by another application, you may change the port number in VLC's settings.
## GIMP
Click on the "Keep" of the image loading pop-up.

View File

@@ -490,16 +490,16 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
parent_id = file['id']
file.GetContentFile(_path, mimetype=file['mimeType'])
except:
logger.info('[ERROR]: Failed to download the file from Google Drive')
except Exception as e:
logger.info('[ERROR]: Failed to download the file from Google Drive', e)
return None
return _path
if 'query' in config:
return get_single_file(config['query'], os.path.join(env.cache_dir, config['dest']))
elif 'path' in config:
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(config['path']) - 1
else f'title = {fp} and trashed = false' for idx, fp in enumerate(config['path'])]
query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(config['path']) - 1
else f"title = '{fp}' and trashed = false" for idx, fp in enumerate(config['path'])]
return get_single_file(query, os.path.join(env.cache_dir, config['dest']))
elif 'query_list' in config:
_path_list = []
@@ -512,8 +512,8 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
_path_list = []
assert len(config['path_list']) == len(config['dest'])
for idx, path in enumerate(config['path_list']):
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(path) - 1
else f'title = {fp} and trashed = false' for jdx, fp in enumerate(path)]
query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(path) - 1
else f"title = '{fp}' and trashed = false" for jdx, fp in enumerate(path)]
dest = config['dest'][idx]
_path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
return _path_list

View File

@@ -27,6 +27,11 @@ def get_gimp_config_file(env, config: Dict[str, str]):
_path = os.path.join(env.cache_dir, config["dest"])
content = env.controller.get_file(config_path)
if not content:
logger.error("Failed to get GIMP config file.")
return None
with open(_path, "wb") as f:
f.write(content)

View File

@@ -8,6 +8,8 @@ from .chrome import (
is_expected_tabs,
is_expected_bookmarks,
compare_pdfs,
compare_htmls,
compare_archive,
is_cookie_deleted,
is_shortcut_on_desktop,
check_font_size,
@@ -92,7 +94,8 @@ from .table import (
)
from .thunderbird import (
check_thunderbird_prefs,
check_thunderbird_filter
check_thunderbird_filter,
check_thunderbird_folder
)
from .vlc import (
is_vlc_playing,

View File

@@ -1,6 +1,6 @@
import logging, re
from typing import Any, Dict, List
import logging, re, os, shutil
from typing import Any, Dict, List, Union
from bs4 import BeautifulSoup, Tag
import fitz # PyMuPDF
import rapidfuzz.fuzz as fuzz
@@ -14,7 +14,6 @@ def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> f
Checks if the expected tabs are open in Chrome.
"""
print(open_tabs, rule)
match_type = rule['type']
if match_type == "url":
@@ -53,10 +52,12 @@ def is_expected_search_query(active_tab_info: Dict[str, str], rules: Dict[str, A
return 0.
def compare_pdfs(pdf1_path, pdf2_path):
def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[str]]):
"""
Compare two PDF files.
"""
if type(pdf2_path) != list:
pdf1_path, pdf2_path = [pdf1_path], [pdf2_path]
def extract_text_from_pdf(pdf_path):
"""Extract text from each page of the PDF."""
@@ -65,14 +66,100 @@ def compare_pdfs(pdf1_path, pdf2_path):
for page in pdf:
text += page.get_text()
return text.strip()
try:
text1 = extract_text_from_pdf(pdf1_path)
text2 = extract_text_from_pdf(pdf2_path)
return fuzz.ratio(text1, text2) / 100
except Exception as e:
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
return 0.0
score = 0.
for path1, path2 in zip(pdf1_path, pdf2_path):
try:
text1 = extract_text_from_pdf(path1)
text2 = extract_text_from_pdf(path2)
score += fuzz.ratio(text1, text2) / 100
except Exception as e:
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
return score / len(pdf2_path)
def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
"""
Compare two archives. Note that the files in the archives should be of the same type.
"""
if not pred_path: return 0.
pred_folder = os.path.splitext(pred_path)[0] + '_pred'
gold_folder = os.path.splitext(gold_path)[0] + '_gold'
if os.path.exists(pred_folder): # remove existing folder for new predictions
shutil.rmtree(pred_folder, ignore_errors=True)
os.makedirs(pred_folder)
shutil.unpack_archive(pred_path, pred_folder)
if not os.path.exists(gold_folder): # use cache if exists
os.makedirs(gold_folder)
shutil.unpack_archive(gold_path, gold_folder)
pred_files = sorted(os.listdir(pred_folder))
gold_files = sorted(os.listdir(gold_folder))
if pred_files != gold_files: return 0.
def get_compare_function():
file_type = kwargs.pop('file_type', 'text')
if file_type == 'text':
from .vscode import compare_text_file
return compare_text_file
elif file_type == 'pdf': return compare_pdfs
elif file_type == 'docx':
from .docs import compare_docx_files
return compare_docx_files
elif file_type == 'ppt':
from .slides import compare_pptx_files
return compare_pptx_files
elif file_type == 'image':
from .vlc import compare_images
return compare_images
elif file_type == 'csv':
from .table import compare_csv
return compare_csv
elif file_type == 'table':
from .table import compare_table
return compare_table
elif file_type == 'audio':
from .vlc import compare_audios
return compare_audios
elif file_type == 'video':
from .vlc import compare_videos
return compare_videos
else: raise ValueError('[ERROR]: not support file type: %s' % file_type)
score = 0
compare_function = get_compare_function()
for f1, f2 in zip(pred_files, gold_files):
fp1 = os.path.join(pred_folder, f1)
fp2 = os.path.join(gold_folder, f2)
score += compare_function(fp1, fp2, **kwargs)
return score / len(pred_files)
def compare_htmls(html_path1: str, html_path2: str) -> float:
"""
Compare two HTML files.
"""
with open(html_path1, 'r', encoding='utf-8') as inf:
soup1 = BeautifulSoup(inf, 'lxml')
with open(html_path2, 'r', encoding='utf-8') as inf:
soup2 = BeautifulSoup(inf, 'lxml')
def compare_elements(elem1, elem2):
if not (isinstance(elem1, Tag) and isinstance(elem2, Tag)):
return elem1 == elem2
if elem1.name != elem2.name:
return False
if elem1.text.strip() != elem2.text.strip():
return False
if elem1.attrs != elem2.attrs:
return False
return True
for elem1, elem2 in zip(soup1.recursiveChildGenerator(), soup2.recursiveChildGenerator()):
if not compare_elements(elem1, elem2):
return .0
return 1.
def is_cookie_deleted(cookie_data, rule):

View File

@@ -12,6 +12,8 @@ def compare_image_list(pred_img_path_list: Union[str, List[str]],
pred_img_path_list = [pred_img_path_list]
gold_img_path_list = [gold_img_path_list]
for pred_img_path, gold_img_path in zip(pred_img_path_list, gold_img_path_list):
if not pred_img_path or not gold_img_path:
return 0.0
pred_img = Image.open(pred_img_path)
gold_img = Image.open(gold_img_path)
diff = ImageChops.difference(pred_img, gold_img)

View File

@@ -190,8 +190,7 @@ def compare_pptx_files(file1_path, file2_path, **options):
if run1.font.underline != run2.font.underline and examine_font_underline:
return 0
if ('strike' in run1.font._element.attrib) != (
'strike' in run2.font._element.attrib) and examine_strike_through:
if run1.font._element.attrib.get('strike', 'noStrike') != run2.font._element.attrib.get('strike', 'noStrike') and examine_strike_through:
return 0
# fixme: Actually there are more properties to be compared, but we cannot get them through pptx
@@ -250,6 +249,14 @@ def evaluate_presentation_fill_to_rgb_distance(pptx_file, rules):
r1, g1, b1 = fill.fore_color.rgb
r2, g2, b2 = _rgb
return sqrt((r1 - r2) ** 2 + (g1 - g2) ** 2 + (b1 - b2) ** 2) / sqrt(255 ** 2 + 255 ** 2 + 255 ** 2)
elif fill.type == 5:
master_fill = _slide.slide_layout.slide_master.background.fill
if master_fill.type == 1:
r1, g1, b1 = master_fill.fore_color.rgb
else:
return 1
r2, g2, b2 = _rgb
return sqrt((r1 - r2) ** 2 + (g1 - g2) ** 2 + (b1 - b2) ** 2) / sqrt(255 ** 2 + 255 ** 2 + 255 ** 2)
return 1
@@ -363,3 +370,7 @@ def check_page_number_colors(pptx_file, rules):
return 0
return 1
if __name__ == '__main__':
print(compare_pptx_files(r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\550ce7e7-747b-495f-b122-acdc4d0b8e54\New_Club_Spring_2018_Training_Gold.pptx", r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\550ce7e7-747b-495f-b122-acdc4d0b8e54\New_Club_Spring_2018_Training_Gold.pptx"))
# print(evaluate_presentation_fill_to_rgb_distance(r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\3b27600c-3668-4abd-8f84-7bcdebbccbdb\lec17-gui-events.pptx", {"rgb": (0, 0, 255)}))

View File

@@ -128,6 +128,47 @@ def check_thunderbird_filter(result: str, rules: Dict[str, List[Dict[str, str]]]
unexpect_metric = unexpect_metric and not any(_match_record(r, flt) for r in rules.get("unexpect", []))
return float(all(expect_metrics) and unexpect_metric)
def check_thunderbird_folder(result: Union[str, List[str]], reference: Union[str, List[str]], **kwargs) -> float:
"""
Check the file or file_list that each text file contains all messages in a folder in Thunderbird. Each message is started with `FROM - `.
**kwargs:
ignore_status (bool): for comparison, ignore the status (X-Mozilla-Status: 0000) of each message. default: False
ignore_keys (bool): for comparison, ignore the keys (X-Mozilla-Keys: label) of each message. default: False
remove_deleted (bool): ignore deleted messages which has status code 0008 or 0009. default: True
remove_duplicate (bool): remove duplicate messages. default: True
"""
def normalize_msg(msg, options):
ignore_status = options.get('ignore_status', False)
ignore_keys = options.get('ignore_keys', False)
if ignore_status:
msg = re.sub(r'X-Mozilla-Status\d?:[\s\d]+', '', msg)
if ignore_keys:
msg = re.sub(r'(X-Mozilla-Keys:[^\n]*?)\n(MIME-Version)', r'\2', msg)
return msg.strip()
def read_thunderbird_folder_file(path: str) -> str:
with open(path, 'r') as inf:
data = inf.read().strip()
messages = []
for mail in data.split('FROM - '):
if mail.strip(): continue
if kwargs.get('remove_deleted', True) and re.search(r'X-Mozilla-Status: 000[89]', mail): continue
messages.append('FROM - ' + normalize_msg(mail, kwargs))
if kwargs.get('remove_duplicate', True):
messages = set(messages)
return '\n'.join(sorted(messages))
if type(reference) != list:
result, reference = [result], [reference]
for pred, gold in zip(result, reference):
if pred is None: return .0
mail1 = read_thunderbird_folder_file(pred)
mail2 = read_thunderbird_folder_file(gold)
if mail1 != mail2: return .0
return 1.0
if __name__ == "__main__":
#import lxml.etree
#from lxml.cssselect import CSSSelector

View File

@@ -49,6 +49,8 @@ def check_json_settings(actual: str, expected: str, **options) -> float:
Return:
float: the score
"""
if not actual:
return 0.
with open(actual, 'r') as f:
data = json.load(f)

View File

@@ -1,7 +1,7 @@
{
"id": "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
"snapshot": "libreoffice_impress",
"instruction": "Please make the background blue on all my slides.",
"instruction": "Please make the background blue on all my slides. I was stuck by finding the entrance to do that for a while...",
"source": "https://www.libreofficehelp.com/change-slide-background-impress/#All_Slides",
"config": [
{

View File

@@ -40,9 +40,8 @@
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.freerice.com/",
"https://www.hku.hk/",
"https://about.meta.com/technologies/facebook-app/"
"https://news.google.com",
"https://x.com"
]
}
},

View File

@@ -44,9 +44,8 @@
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.freerice.com/",
"https://www.hku.hk/",
"https://about.meta.com/technologies/facebook-app/"
"https://news.google.com",
"https://x.com"
]
}
},

View File

@@ -28,7 +28,6 @@
"parameters": {
"urls_to_open": [
"https://www.apple.com/",
"https://en.sjtu.edu.cn/",
"https://scholar.google.com/"
]
}
@@ -83,7 +82,6 @@
"type": "url",
"urls": [
"https://www.apple.com/",
"https://en.sjtu.edu.cn/",
"https://scholar.google.com/",
"https://www.amazon.com/"
]

View File

@@ -0,0 +1,177 @@
{
"id": "78aed49a-a710-4321-a793-b611a7c5b56b",
"snapshot": "chrome",
"instruction": "Could you help me save all attachments of the oldest email in Bills local folders to the attachment/ folder in Google Drive and then move this email to a different folder \"have_seen\" in Local Folders.",
"source": "https://marketplace.uipath.com/listings/upload-email-attachments-from-gmail-to-google-drive",
"config": [
{
"type": "googledrive",
"parameters": {
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
"operation": ["delete"],
"args": [
{
"query": "title = 'attachment' and 'root' in parents and trashed = false",
"trash": false
}
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://stackoverflow.com/",
"https://paperswithcode.com/"
]
}
},
{
"type": "login",
"parameters": {
"settings_file": "evaluation_examples/settings/google/settings.json",
"platform": "googledrive"
}
},
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1W18fZ4Dog-cSH3Fxa3bcfTz28z6AsL_1&export=download&authuser=0&confirm=t&uuid=7c8fc164-6e7b-4849-aba1-f3dba947feba&at=APZUnTWd2DFbIYnlMf-GY4qMqXcc:1706682437093",
"path": "/home/user/thunderbird-profile.tar.gz"
}
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"tar",
"-xz",
"--recursive-unlink",
"-f",
"/home/user/thunderbird-profile.tar.gz",
"-C",
"/home/user/"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"/usr/bin/thunderbird"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"thunderbird",
"chrome"
],
"evaluator": {
"func": [
"check_thunderbird_folder",
"compare_pdfs"
],
"result": [
{
"type": "vm_file",
"multi": true,
"path": [
"/home/user/.thunderbird/t5q2a5hp.default-release/Mail/Local Folders/have_seen",
"/home/user/.thunderbird/t5q2a5hp.default-release/Mail/Local Folders/Bills"
],
"dest": [
"have_seen",
"Bills"
],
"gives": [
0,
1
]
},
{
"type": "googledrive_file",
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
"path_list": [
[
"attachment",
"Invoice-01234567-89ab.pdf"
],
[
"attachment",
"Receipt-0123-4567.pdf"
]
],
"dest": [
"Invoice-01234567-89ab.pdf",
"Receipt-0123-4567.pdf"
]
}
],
"expected": [
{
"type": "cloud_file",
"multi": true,
"path": [
"https://drive.usercontent.google.com/download?id=12ielR0p3jFZ4UDfGlC7fdF88uUTM_3bn&export=download&authuser=0&confirm=t&uuid=6ef47d29-2772-49cd-87e7-17efd587f1ba&at=APZUnTUGjXxDcDcMF5RoAsVtbFOB:1706689562530",
"https://drive.usercontent.google.com/download?id=13XQ4Hr62WyHVwpefjD-tgkpjb0p9HKpe&export=download&authuser=0&confirm=t&uuid=ad4ee033-83e4-4df4-9f9c-ac61d43634bc&at=APZUnTV5ZI1GFXNHTyIblwSKjyku:1706692277803"
],
"dest": [
"have_seen_gold",
"Bills_gold"
],
"gives": [
0,
1
]
},
{
"type": "cloud_file",
"multi": true,
"path": [
"https://drive.usercontent.google.com/download?id=1SlzOPRfzaKyhBERy-ks2-rjzrjtvgoDG&export=download&authuser=0&confirm=t&uuid=e4cb717b-fb5d-4860-99d9-9e1f38df2592&at=APZUnTXlcSimcf9qZ7uZmlAUj-zQ:1706683984205",
"https://drive.usercontent.google.com/download?id=19SdAYymlHvFQ7wzc-_JfFYOYMW6xAw5-&export=download&authuser=0&confirm=t&uuid=e1cbff64-d615-493d-9d02-bcdbd40f878c&at=APZUnTVRpcei6q_BjzQ7EtHA7voq:1706683990388"
],
"dest": [
"Invoice-01234567-89ab_gold.pdf",
"Receipt-0123-4567_gold.pdf"
],
"gives": [
0,
1
]
}
],
"options": [
{
"remove_deleted": true,
"remove_duplicate": true
},
{}
]
}
}

View File

@@ -21,7 +21,7 @@
"type": "launch",
"parameters": {
"command": [
"chromium-browser",
"google-chrome",
"--remote-debugging-port=1337"
]
}
@@ -42,6 +42,7 @@
"urls_to_open": [
"https://www.zhihu.com/",
"https://www.coursera.org/",
"https://www.deepl.com",
"https://www.wikidata.org/wiki/Wikidata:Main_Page"
]
}

View File

@@ -0,0 +1,132 @@
{
"id": "a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb",
"snapshot": "chrome",
"instruction": "Please help me backup my emails in \"Bills\" folder in Thunderbird and store the .eml files with only subject names to my Google Drive folder called \"emails\".",
"source": "https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive",
"config": [
{
"type": "googledrive",
"parameters": {
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
"operation": ["delete"],
"args": [
{
"query": "title = 'emails' and 'root' in parents",
"trash": false
}
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://arxiv.org/",
"https://github.com/ohmyzsh/ohmyzsh/wiki/themes",
"https://releases.ubuntu.com/"
]
}
},
{
"type": "login",
"parameters": {
"settings_file": "evaluation_examples/settings/google/settings.json",
"platform": "googledrive"
}
},
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1cATYRGGbZ_vZEjJWajI1Dn4gmp3Won-l&export=download&authuser=0&confirm=t&uuid=156022ae-a56a-400c-a934-34e4369ec82a&at=APZUnTWMnGMhJciITn7IvRY33zuJ:1706707804986",
"path": "/home/user/thunderbird-profile.tar.gz"
}
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"tar",
"-xz",
"--recursive-unlink",
"-f",
"/home/user/thunderbird-profile.tar.gz",
"-C",
"/home/user/"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"/usr/bin/thunderbird"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"thunderbird",
"chrome"
],
"evaluator": {
"func": "check_thunderbird_folder",
"result": {
"type": "googledrive_file",
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
"path_list": [
[
"emails",
"Amazon Web Services Invoice Available [Account: 0123456789ab] [Invoice ID: 0123456789].eml"
],
[
"emails",
"Your receipt from X (formerly Twitter) #0123-4567.eml"
]
],
"dest": [
"pred1.eml",
"pred2.eml"
]
},
"expected": {
"type": "cloud_file",
"multi": "true",
"path": [
"https://drive.usercontent.google.com/download?id=1Fb2ofAjfn-wlIYbtIGXNvLJEB85KbO7Y&export=download&authuser=0&confirm=t&uuid=9a996347-9093-43ed-8ad2-7e769e0576bd&at=APZUnTUp2pvXjM0zGvjc22lewOv3:1706708252395",
"https://drive.usercontent.google.com/download?id=1LZ2PgwmVgO62dNOueWy7RLlJ1_d8Hz3E&export=download&authuser=0&confirm=t&uuid=f632abc2-3ff7-4e9f-a7c6-72c9bbc44654&at=APZUnTXihve7i15GwLyEx2rwDFUk:1706708265408"
],
"dest": [
"gold1.eml",
"gold2.eml"
],
"gives": [
0,
1
]
}
}
}

View File

@@ -40,9 +40,9 @@
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.freerice.com/",
"https://www.hku.hk/",
"https://about.meta.com/technologies/facebook-app/"
"https://news.google.com",
"https://x.com",
"https://www.deepl.com"
]
}
},

View File

@@ -0,0 +1,98 @@
{
"id": "c867c42d-a52d-4a24-8ae3-f75d256b5618",
"snapshot": "thunderbird",
"instruction": "Please assist me in exporting my contacts of Personal Address Book from Thunderbird into contacts.csv file in the desktop and convert it to .xlsx with Libreoffice Calc.",
"source": "https://www.sync.blue/en/sync/mozilla-thunderbird/google-sheets/",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"libreoffice",
"--calc"
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1wKXmJ14dnxSzdy9ZF_ePWU7zpevY6Dry&export=download&authuser=0&confirm=t&uuid=9b476c95-8eee-4a9a-8cee-c3620d5ce250&at=APZUnTUzDeeeMNr34DB1vEnBK6N7:1706719624132",
"path": "/home/user/thunderbird-profile.tar.gz"
}
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"tar",
"--recursive-unlink",
"-xz",
"-f",
"/home/user/thunderbird-profile.tar.gz",
"-C",
"/home/user/"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"/usr/bin/thunderbird"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"thunderbird",
"libreoffice_calc"
],
"evaluator": {
"func": [
"compare_csv",
"compare_table"
],
"conj": "and",
"result": [
{
"type": "vm_file",
"path": "/home/user/Desktop/contacts.csv",
"dest": "contacts.csv"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/contacts.xlsx",
"dest": "contacts.xlsx"
}
],
"expected": [
{
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1StwASpAR2ALq2Y1vugGsdUJptg6FwjEm&export=download&authuser=0&confirm=t&uuid=56339e19-b889-4da1-ab72-5e0b90f13fff&at=APZUnTVWFF2pBrtWU_hXgzfbrWP2:1706719668676",
"dest": "contacts_gold.csv"
},
{
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1s25eUpvkMzSm6p_WA7O13t6mVqmkxr2C&export=download&authuser=0&confirm=t&uuid=901cbd32-6026-4391-a5cc-989e1047cf7c&at=APZUnTUs27mZceDshB_f9Tx4PFyz:1706719610831",
"dest": "contacts_gold.xlsx"
}
],
"options": [
{},
{
"rules": [
{
"type": "sheet_data",
"sheet_idx0": "RI0",
"sheet_idx1": "EI0"
}
]
}
]
}
}

View File

@@ -0,0 +1,68 @@
{
"id": "d9b7c649-c975-4f53-88f5-940b29c47247",
"snapshot": "thunderbird",
"instruction": "Help me extract the latest 5 emails in daily folder from Thunderbird, from the earliest to the most recent by time, and creates a LibreOffice Calc Report \"report.xlsx\" in the desktop, storing the sender_name, sender_address, subject, CC, and number_of_attachments.",
"source": "https://marketplace.uipath.com/listings/extract-the-first-1000-gmail-emails-from-the-current-month-in-a-new-google-sheets-report",
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1aCmZvSIs8Mb2kM_IVyhTbLawFvHOrlW-&export=download&authuser=0&confirm=t&uuid=dee0fe8b-731b-4bb7-97c4-02a2d8154da8&at=APZUnTVO-WjksU7WYUq4sCkNMlsL:1706710969972",
"path": "/home/user/thunderbird-profile.tar.gz"
}
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"tar",
"-xz",
"--recursive-unlink",
"-f",
"/home/user/thunderbird-profile.tar.gz",
"-C",
"/home/user/"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"/usr/bin/thunderbird"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"thunderbird",
"libreoffice_calc"
],
"evaluator": {
"func": "compare_table",
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/report.xlsx",
"dest": "report.xlsx"
},
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=13L73ukCzuLYUgY11xz2b6DEoYhFvmXVE&export=download&authuser=0&confirm=t&uuid=82e92b1f-d4c0-44e6-b40f-595dff880acd&at=APZUnTXzPvN60uTLjy7QSPEF2Ft9:1706714031096",
"dest": "report_gold.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": "RI0",
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -0,0 +1,91 @@
{
"id": "e135df7c-7687-4ac0-a5f0-76b74438b53e",
"snapshot": "libreoffice_calc",
"instruction": "Please convert a .xlsx file opened in LibreOffice Calc to a .html file and view it in Chrome.",
"source": "https://www.ilovefreesoftware.com/23/featured/free-csv-to-html-converter-software-windows.html",
"config": [
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://aclanthology.org/",
"https://openai.com/",
"https://www.linkedin.com/home/"
]
}
},
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1zio_nULUdQGFGFBD55aZ52vhqYBtlQeJ&export=download&authuser=0&confirm=t&uuid=68483de0-2035-461d-90d5-e4048825d1ce&at=APZUnTXRMiB4UDzbsqrgm6BbFefE:1706630941497",
"path": "/home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.xlsx"
}
]
}
},
{
"type": "open",
"parameters": {
"path": "/home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.xlsx"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"thunderbird",
"chrome"
],
"evaluator": {
"func": ["is_expected_tabs", "compare_htmls"],
"result": [
{
"type": "open_tabs_info"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.html",
"dest": "annual-enterprise-survey-2021-financial-year-provisional.html"
}
],
"expected": [
{
"type": "rule",
"rules": {
"type": "url",
"urls": [
"https://aclanthology.org/",
"https://openai.com/",
"https://www.linkedin.com/home/",
"file:///home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.html"
]
}
},
{
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1HvdTjLZctQGgo3BojmPBiSrPXFkqubJi&export=download&authuser=0&confirm=t&uuid=47f78e2c-bc38-416b-900f-4837ff588bfa&at=APZUnTXSartmo7MSTsanrkc9zHpm:1706680670213",
"dest": "annual-enterprise-survey-2021-financial-year-provisional_gold.html"
}
]
}
}

View File

@@ -0,0 +1,121 @@
{
"id": "f7dfbef3-7697-431c-883a-db8583a4e4f9",
"snapshot": "libreoffice_writer",
"instruction": "Could you convert all `.doc` files in current directory to PDF all at once in the command line?",
"source": "https://www.thegeekdiary.com/libreoffice-command-examples-in-linux/",
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1efUzatD7vixloPY9-XENOL3OZ5__1hod&export=download&authuser=0&confirm=t&uuid=14a3a6b1-a6e2-46cf-9e5b-bcc9e2acbc98&at=APZUnTWtyQUzT6eO5ZTcnmAZkwyX:1706704733958",
"path": "/home/user/Desktop/doc.tar.gz"
}
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"/bin/bash",
"-c",
"tar -zxf /home/user/Desktop/doc.tar.gz -C /home/user/Desktop/ && rm /home/user/Desktop/doc.tar.gz"
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"/bin/bash",
"-c",
"history -c && echo > ~/.bash_history && sleep 3"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"gnome-terminal",
"--maximize",
"--working-directory=/home/user/Desktop"
]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"libreoffice_writer",
"terminal"
],
"evaluator": {
"postconfig": [
{
"type": "execute",
"parameters": {
"command": [
"/bin/bash",
"-c",
"cd /home/user/Desktop && tar -zcf pdf.tar.gz *.pdf"
]
}
},
{
"type": "execute",
"parameters": {
"command": [
"/bin/bash",
"-c",
"killall gnome-terminal-server"
]
}
}
],
"func": [
"check_include_exclude",
"compare_archive"
],
"result": [
{
"type": "vm_command_line",
"command": [
"/bin/bash",
"-c",
"output=$(cat ~/.bash_history | grep -E \"(soffice|libreoffice).+--convert-to\\s+pdf.+\\*\\.doc\"); if [ -z \"$output\" ]; then echo \"failed to complete this task\"; else echo \"catch the desired command\"; fi"
]
},
{
"type": "vm_file",
"path": "/home/user/Desktop/pdf.tar.gz",
"dest": "pdf.tar.gz"
}
],
"expected": [
{
"type": "rule",
"rules": {
"include": [
"catch the desired command"
],
"exclude": [
"failed to complete this task"
]
}
},
{
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1A-UoQdQvvtFBQWkPi_Q8ecNe4gjlh0dg&export=download&authuser=0&confirm=t&uuid=0c97e2f6-3de8-438f-a676-557cca5ee292&at=APZUnTUy4pghj5TtS-suVMXrCLO2:1706705033330",
"dest": "pdf_gold.tar.gz"
}
],
"options": [
{},
{
"file_type": "pdf"
}
]
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82",
"snapshot": "os",
"instruction": "Can you move the file with the path 'todo.txt' to the directory with the path 'done'?",
"instruction": "Can you move the file with the path 'todo.txt' on the Desktop to the directory with the path 'done' on the Desktop?",
"source": "https://ubuntu.com/tutorials/command-line-for-beginners#5-moving-and-manipulating-files",
"config": [
{

View File

@@ -18,7 +18,10 @@
{
"type": "launch",
"parameters": {
"command": ["code", "/home/user/Desktop/vscode_replace_text.txt"]
"command": [
"code",
"/home/user/Desktop/vscode_replace_text.txt"
]
}
},
{
@@ -33,6 +36,36 @@
"vscode"
],
"evaluator": {
"postconfig": [
{
"type": "activate_window",
"parameters": {
"window_name": "Visual Studio Code"
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
},
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; pyautogui.hotkey('ctrl', 's');"
]
}
},
{
"type": "sleep",
"parameters": {
"seconds": 0.5
}
}
],
"func": "compare_text_file",
"expected": {
"type": "cloud_file",

View File

@@ -1,13 +1 @@
{
"installed": {
"client_id": "786888752612-6cv6lermep9n6704s4kv20h08lotias9.apps.googleusercontent.com",
"project_id": "xlang-2024-benchmarking",
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
"token_uri": "https://oauth2.googleapis.com/token",
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
"client_secret": "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",
"redirect_uris": [
"http://localhost"
]
}
}
{"installed":{"client_id":"786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com","project_id":"xlang-2024-benchmarking","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}

View File

@@ -1 +1 @@
{"access_token": "ya29.a0AfB_byBdrcgbmWKGyEUOxxuJBhxOs1uu0AqWeLgJKKKLG_dVg4iQKJAdiWD2oetHrKn17p4ZtfX-vt1VQ0BiF2MPD2exX1oESsQkXaO8q4TM1olIpadvlUBqUkqKJcjCqn1dp1oaTVYU-Srf2wQCGnDt3ozjljdkHXN_MQaCgYKAa4SARISFQHGX2MiWnixlrP3Se3vEV73_4fenA0173", "client_id": "786888752612-6cv6lermep9n6704s4kv20h08lotias9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0e0qXy4xW1Ud5CgYIARAAGA4SNwF-L9IrWfaomed_CK0R7zZffcpT-GIXf3y2ZjqqAD0UP6UkbaMV9F_OEC6pBVaaX4TYnBKx3os", "token_expiry": "2024-01-30T11:20:53Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byBdrcgbmWKGyEUOxxuJBhxOs1uu0AqWeLgJKKKLG_dVg4iQKJAdiWD2oetHrKn17p4ZtfX-vt1VQ0BiF2MPD2exX1oESsQkXaO8q4TM1olIpadvlUBqUkqKJcjCqn1dp1oaTVYU-Srf2wQCGnDt3ozjljdkHXN_MQaCgYKAa4SARISFQHGX2MiWnixlrP3Se3vEV73_4fenA0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
{"access_token": "ya29.a0AfB_byAZmDTDsYds_iatV8a30PUPWcDHVW4Cyg71pTlD0f3eBBwAjV4WpVL8LdAle8sT4j_rX4rWH8iCt3QI2YdrQLFPlaVdBk0zRGGtAEcebIDuQy_VKD6j5c3IGxok9PDON-Mft0ZVJjUVEopgLYA4fYwctbQZ8nyl4AaCgYKAX4SARISFQHGX2Mim-LRNXCfACmecJH94-D09A0173", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0ehtafHmucszRCgYIARAAGA4SNwF-L9IrpDBsnzdHKAlRfrkvzNFw1cpdnRY8rhM5gy4flsPYdysMav27yHamJx39BBGq-LLw40s", "token_expiry": "2024-01-31T14:41:25Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byAZmDTDsYds_iatV8a30PUPWcDHVW4Cyg71pTlD0f3eBBwAjV4WpVL8LdAle8sT4j_rX4rWH8iCt3QI2YdrQLFPlaVdBk0zRGGtAEcebIDuQy_VKD6j5c3IGxok9PDON-Mft0ZVJjUVEopgLYA4fYwctbQZ8nyl4AaCgYKAX4SARISFQHGX2Mim-LRNXCfACmecJH94-D09A0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}

View File

@@ -1,9 +1,9 @@
import ctypes
import datetime
import json
import logging
import os
import sys
import func_timeout
from desktop_env.envs.desktop_env import DesktopEnv
@@ -46,7 +46,6 @@ logger = logging.getLogger("desktopenv.experiment")
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
env = DesktopEnv(
@@ -123,9 +122,8 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
logger.info("Environment closed.")
def main(example_class, example_id):
def main(example_class, example_id, gpt4_model="gpt-4-0125-preview"):
action_space = "pyautogui"
gpt4_model = "gpt-4-0125-preview"
gemini_model = "gemini-pro-vision"
logger.info("Running example %s/%s", example_class, example_id)
@@ -134,7 +132,7 @@ def main(example_class, example_id):
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_v1"
example["snapshot"] = "exp_v5"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'], max_tokens=1000,
@@ -154,25 +152,50 @@ def main(example_class, example_id):
if __name__ == '__main__':
os_list = [
"94d95f96-9699-4208-98ba-3c3119edf9c2",
"bedcedc4-4d72-425e-ad62-21960b11fe0d",
"43c2d64c-bab5-4dcb-a30c-b888321c319a",
"7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82",
"ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3",
"f9be0997-4b7c-45c5-b05c-4612b44a6118",
"28cc3b7e-b194-4bc9-8353-d04c0f4d56d2",
"5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57",
"e0df059f-28a6-4169-924f-b9623e7184cc",
"ddc75b62-7311-4af8-bfb3-859558542b36",
"b6781586-6346-41cd-935a-a6b1487918fc",
"3ce045a0-877b-42aa-8d2c-b4a863336ab8",
"a4d98375-215b-4a4d-aee9-3d4370fccc41",
"13584542-872b-42d8-b299-866967b5c3ef",
"23393935-50c7-4a86-aeea-2b78fd089c5c"
]
# for example_id in os_list:
# try:
# main("os", example_id, gpt4_model="gpt-3.5-turbo-16k")
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
vlc_list = [
# "8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
# "8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
# "8f080098-ddb1-424c-b438-4e96e5e4786e",
# "bba3381f-b5eb-4439-bd9e-80c22218d5a7",
# "fba2c100-79e8-42df-ae74-b592418d54f4",
# "efcf0d81-0835-4880-b2fd-d866e8bc2294",
# "8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f",
# "aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6",
# "386dbd0e-0241-4a0a-b6a2-6704fba26b1c",
# "9195653c-f4aa-453d-aa95-787f6ccfaae9",
# "d06f0d4d-2cd5-4ede-8de9-598629438c6e",
# "a5bbbcd5-b398-4c91-83d4-55e1e31bbb81",
"8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
"8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
"8f080098-ddb1-424c-b438-4e96e5e4786e",
"bba3381f-b5eb-4439-bd9e-80c22218d5a7",
"fba2c100-79e8-42df-ae74-b592418d54f4",
"efcf0d81-0835-4880-b2fd-d866e8bc2294",
"8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f",
"aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6",
"386dbd0e-0241-4a0a-b6a2-6704fba26b1c",
"9195653c-f4aa-453d-aa95-787f6ccfaae9",
"d06f0d4d-2cd5-4ede-8de9-598629438c6e",
"a5bbbcd5-b398-4c91-83d4-55e1e31bbb81",
"f3977615-2b45-4ac5-8bba-80c17dbe2a37",
"215dfd39-f493-4bc3-a027-8a97d72c61bf"
]
chrome_list = [
# "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"06fe7178-4491-4589-810f-2e2bc9502122",
"e1e75309-3ddb-4d09-92ec-de869c928143",
@@ -215,5 +238,116 @@ if __name__ == '__main__':
"4f07fbe9-70de-4927-a4d5-bb28bc12c52c",
]
for example_id in calc_list:
main("libreoffice_calc", example_id)
# for example_id in calc_list:
# main("libreoffice_calc", example_id)
impress_list = [
# "5d901039-a89c-4bfb-967b-bf66f4df075e",
# "550ce7e7-747b-495f-b122-acdc4d0b8e54",
# "455d3c66-7dc6-4537-a39a-36d3e9119df7",
# "af23762e-2bfd-4a1d-aada-20fa8de9ce07",
# "c59742c0-4323-4b9d-8a02-723c251deaa0",
# "ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
# "9ec204e4-f0a3-42f8-8458-b772a6797cab",
# "0f84bef9-9790-432e-92b7-eece357603fb",
# "ce88f674-ab7a-43da-9201-468d38539e4a",
# "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
# "a097acff-6266-4291-9fbd-137af7ecd439",
# "bf4e9888-f10f-47af-8dba-76413038b73c",
"21760ecb-8f62-40d2-8d85-0cee5725cb72"
]
# for example_id in impress_list:
# main("libreoffice_impress", example_id)
thunderbird_list = [
# "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
# "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"12086550-11c0-466b-b367-1d9e75b3910e",
"06fe7178-4491-4589-810f-2e2bc9502122",
"6766f2b8-8a72-417f-a9e5-56fcaa735837",
"e1e75309-3ddb-4d09-92ec-de869c928143",
"3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5",
"35253b65-1c19-4304-8aa4-6884b8218fc0",
"d088f539-cab4-4f9a-ac92-9999fc3a656e",
"2ad9387a-65d8-4e33-ad5b-7580065a27ca",
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
"030eeff7-b492-4218-b312-701ec99ee0cc",
"94760984-3ff5-41ee-8347-cf1af709fea0",
"99146c54-4f37-4ab8-9327-5f3291665e1e",
"c9e7eaf2-b1a1-4efc-a982-721972fa9f02"
]
# for example_id in thunderbird_list:
# main("thunderbird", example_id)
gimp_list = [
"7a4deb26-d57d-4ea9-9a73-630f66a7b568",
"554785e9-4523-4e7a-b8e1-8016f565f56a",
"77b8ab4d-994f-43ac-8930-8ca087d7c4b4",
"f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce",
"d52d6308-ec58-42b7-a2c9-de80e4837b2b",
"2a729ded-3296-423d-aec4-7dd55ed5fbb3",
"b148e375-fe0b-4bec-90e7-38632b0d73c2",
"a746add2-cab0-4740-ac36-c3769d9bfb46",
"7b7617bd-57cc-468e-9c91-40c4ec2bcb3d",
"d16c99dc-2a1e-46f2-b350-d97c86c85c15",
"06ca5602-62ca-47f6-ad4f-da151cde54cc",
"e2dd0213-26db-4349-abe5-d5667bfd725c",
"f723c744-e62c-4ae6-98d1-750d3cd7d79d",
"72f83cdc-bf76-4531-9a1b-eb893a13f8aa",
"7767eef2-56a3-4cea-8c9f-48c070c7d65b",
"734d6579-c07d-47a8-9ae2-13339795476b"
]
# for example_id in gimp_list:
# try:
# main("gimp", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
vs_code_list = [
"0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
"eabc805a-bfcf-4460-b250-ac92135819f6",
"982d12a5-beab-424f-8d38-d2a48429e511",
"4e60007a-f5be-4bfc-9723-c39affa0a6d3",
"e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
"9439a27b-18ae-42d8-9778-5f68f891805e",
"ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
"930fdb3b-11a8-46fe-9bac-577332e2640e",
"276cc624-87ea-4f08-ab93-f770e3790175",
"9d425400-e9b2-4424-9a4b-d4c7abac4140"
]
# for example_id in vs_code_list:
# try:
# main("vs_code", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
multiple_list = [
"f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
"b52b40a5-ad70-4c53-b5b0-5650a8387052",
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
"2b9493d7-49b8-493a-a71b-56cd1f4d6908",
"51f5801c-18b3-4f25-b0c3-02f85507a078",
"2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
"510f64c8-9bcc-4be1-8d30-638705850618",
"937087b6-f668-4ba6-9110-60682ee33441",
"ee9a3c83-f437-4879-8918-be5efbb9fac7",
"3680a5ee-6870-426a-a997-eba929a0d25c",
"e135df7c-7687-4ac0-a5f0-76b74438b53e",
"58565672-7bfe-48ab-b828-db349231de6b",
"2fe4b718-3bd7-46ec-bdce-b184f5653624"
]
for example_id in multiple_list:
try:
main("multi_apps", example_id)
except Exception as e:
logger.error("An error occurred while running the example: %s", e)
continue

View File

@@ -134,7 +134,7 @@ def main(example_class, example_id):
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_v1"
example["snapshot"] = "exp_v5"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space,
@@ -168,17 +168,17 @@ if __name__ == '__main__':
"af630914-714e-4a24-a7bb-f9af687d3b91"
]
calc_list = [
"eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
"0bf05a7d-b28b-44d2-955a-50b41e24012a",
"7a4e4bc8-922c-4c84-865c-25ba34136be1",
"2bd59342-0664-4ccb-ba87-79379096cc08",
"ecb0df7a-4e8d-4a03-b162-053391d3afaf",
"7efeb4b1-3d19-4762-b163-63328d66303b",
"4e6fcf72-daf3-439f-a232-c434ce416af6",
"6054afcb-5bab-4702-90a0-b259b5d3217c",
"abed40dc-063f-4598-8ba5-9fe749c0615d",
"01b269ae-2111-4a07-81fd-3fcd711993b0",
"8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
# "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
# "0bf05a7d-b28b-44d2-955a-50b41e24012a",
# "7a4e4bc8-922c-4c84-865c-25ba34136be1",
# "2bd59342-0664-4ccb-ba87-79379096cc08",
# "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
# "7efeb4b1-3d19-4762-b163-63328d66303b",
# "4e6fcf72-daf3-439f-a232-c434ce416af6",
# "6054afcb-5bab-4702-90a0-b259b5d3217c",
# "abed40dc-063f-4598-8ba5-9fe749c0615d",
# "01b269ae-2111-4a07-81fd-3fcd711993b0",
# "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
"0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
"4188d3a4-077d-46b7-9c86-23e1a036f6c1",
"51b11269-2ca8-4b2a-9163-f21758420e78",
@@ -197,5 +197,97 @@ if __name__ == '__main__':
"4f07fbe9-70de-4927-a4d5-bb28bc12c52c",
]
for example_id in calc_list:
main("libreoffice_calc", example_id)
# for example_id in calc_list:
# main("libreoffice_calc", example_id)
impress_list = [
# "5d901039-a89c-4bfb-967b-bf66f4df075e",
# "550ce7e7-747b-495f-b122-acdc4d0b8e54",
# "455d3c66-7dc6-4537-a39a-36d3e9119df7",
# "af23762e-2bfd-4a1d-aada-20fa8de9ce07",
# "c59742c0-4323-4b9d-8a02-723c251deaa0",
# "ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
# "9ec204e4-f0a3-42f8-8458-b772a6797cab",
# "0f84bef9-9790-432e-92b7-eece357603fb",
# "ce88f674-ab7a-43da-9201-468d38539e4a",
# "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
# "a097acff-6266-4291-9fbd-137af7ecd439",
# "bf4e9888-f10f-47af-8dba-76413038b73c",
"21760ecb-8f62-40d2-8d85-0cee5725cb72"
]
# for example_id in impress_list:
# main("libreoffice_impress", example_id)
# gimp_list = [
# "7a4deb26-d57d-4ea9-9a73-630f66a7b568",
# "554785e9-4523-4e7a-b8e1-8016f565f56a",
# "77b8ab4d-994f-43ac-8930-8ca087d7c4b4",
# "f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce",
# "d52d6308-ec58-42b7-a2c9-de80e4837b2b",
# "2a729ded-3296-423d-aec4-7dd55ed5fbb3",
# "b148e375-fe0b-4bec-90e7-38632b0d73c2",
# "a746add2-cab0-4740-ac36-c3769d9bfb46",
# "7b7617bd-57cc-468e-9c91-40c4ec2bcb3d",
# "d16c99dc-2a1e-46f2-b350-d97c86c85c15",
# "06ca5602-62ca-47f6-ad4f-da151cde54cc",
# "e2dd0213-26db-4349-abe5-d5667bfd725c",
# "f723c744-e62c-4ae6-98d1-750d3cd7d79d",
# "72f83cdc-bf76-4531-9a1b-eb893a13f8aa",
# "7767eef2-56a3-4cea-8c9f-48c070c7d65b",
# "734d6579-c07d-47a8-9ae2-13339795476b"
# ]
#
# for example_id in gimp_list:
# try:
# main("gimp", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
#
vs_code_list = [
"0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
"eabc805a-bfcf-4460-b250-ac92135819f6",
"982d12a5-beab-424f-8d38-d2a48429e511",
"4e60007a-f5be-4bfc-9723-c39affa0a6d3",
"e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
"9439a27b-18ae-42d8-9778-5f68f891805e",
"ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
"930fdb3b-11a8-46fe-9bac-577332e2640e",
"276cc624-87ea-4f08-ab93-f770e3790175",
"9d425400-e9b2-4424-9a4b-d4c7abac4140"
]
# for example_id in vs_code_list:
# try:
# main("vs_code", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
multiple_list = [
"f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
"b52b40a5-ad70-4c53-b5b0-5650a8387052",
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
"2b9493d7-49b8-493a-a71b-56cd1f4d6908",
"51f5801c-18b3-4f25-b0c3-02f85507a078",
"2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
"510f64c8-9bcc-4be1-8d30-638705850618",
"937087b6-f668-4ba6-9110-60682ee33441",
"ee9a3c83-f437-4879-8918-be5efbb9fac7",
"3680a5ee-6870-426a-a997-eba929a0d25c",
"e135df7c-7687-4ac0-a5f0-76b74438b53e",
"58565672-7bfe-48ab-b828-db349231de6b",
"2fe4b718-3bd7-46ec-bdce-b184f5653624"
]
for example_id in multiple_list:
try:
main("multi_apps", example_id)
except Exception as e:
logger.error("An error occurred while running the example: %s", e)
continue

View File

@@ -1,9 +1,9 @@
import ctypes
import datetime
import json
import logging
import os
import sys
import func_timeout
from desktop_env.envs.desktop_env import DesktopEnv
@@ -124,12 +124,11 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
logger.info("Environment closed.")
def main(example_class, example_id):
def main(example_class, example_id, gpt4_model="gpt-4-vision-preview"):
action_space = "pyautogui"
# example_class = "libreoffice_calc"
# example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
# example_id = "01b269ae-2111-4a07-81fd-3fcd711993b0"
gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision"
logger.info("Running example %s/%s", example_class, example_id)
@@ -138,7 +137,7 @@ def main(example_class, example_id):
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_v1"
example["snapshot"] = "exp_v5"
# example["snapshot"] = "exp_setup4"
# example["snapshot"] = "Snapshot 30"
@@ -160,7 +159,133 @@ def main(example_class, example_id):
if __name__ == '__main__':
xx_list = [
os_list = [
"94d95f96-9699-4208-98ba-3c3119edf9c2",
"bedcedc4-4d72-425e-ad62-21960b11fe0d",
"43c2d64c-bab5-4dcb-a30c-b888321c319a",
"7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82",
"ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3",
"f9be0997-4b7c-45c5-b05c-4612b44a6118",
"28cc3b7e-b194-4bc9-8353-d04c0f4d56d2",
"5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57",
"e0df059f-28a6-4169-924f-b9623e7184cc",
"ddc75b62-7311-4af8-bfb3-859558542b36",
"b6781586-6346-41cd-935a-a6b1487918fc",
"3ce045a0-877b-42aa-8d2c-b4a863336ab8",
"a4d98375-215b-4a4d-aee9-3d4370fccc41",
"13584542-872b-42d8-b299-866967b5c3ef",
"23393935-50c7-4a86-aeea-2b78fd089c5c"
]
for example_id in xx_list:
main("xx", example_id)
# for example_id in os_list:
# try:
# main("os", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
calc_list = [
# "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
# "0bf05a7d-b28b-44d2-955a-50b41e24012a",
# "7a4e4bc8-922c-4c84-865c-25ba34136be1",
# "2bd59342-0664-4ccb-ba87-79379096cc08",
# "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
# "7efeb4b1-3d19-4762-b163-63328d66303b",
# "4e6fcf72-daf3-439f-a232-c434ce416af6",
# "6054afcb-5bab-4702-90a0-b259b5d3217c",
# "abed40dc-063f-4598-8ba5-9fe749c0615d",
# "01b269ae-2111-4a07-81fd-3fcd711993b0",
# "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
# "0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
# "4188d3a4-077d-46b7-9c86-23e1a036f6c1",
# "51b11269-2ca8-4b2a-9163-f21758420e78",
# "7e429b8d-a3f0-4ed0-9b58-08957d00b127",
# "347ef137-7eeb-4c80-a3bb-0951f26a8aff",
# "6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
# "3aaa4e37-dc91-482e-99af-132a612d40f3",
# "37608790-6147-45d0-9f20-1137bb35703d",
# "f9584479-3d0d-4c79-affa-9ad7afdd8850",
"d681960f-7bc3-4286-9913-a8812ba3261a",
"21df9241-f8d7-4509-b7f1-37e501a823f7",
"1334ca3e-f9e3-4db8-9ca7-b4c653be7d17",
"357ef137-7eeb-4c80-a3bb-0951f26a8aff",
"aa3a8974-2e85-438b-b29e-a64df44deb4b",
"a01fbce3-2793-461f-ab86-43680ccbae25",
"4f07fbe9-70de-4927-a4d5-bb28bc12c52c",
]
# for example_id in calc_list:
# try:
# main("libreoffice_calc", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
impress_list = [
"5d901039-a89c-4bfb-967b-bf66f4df075e",
"550ce7e7-747b-495f-b122-acdc4d0b8e54",
"455d3c66-7dc6-4537-a39a-36d3e9119df7",
"af23762e-2bfd-4a1d-aada-20fa8de9ce07",
"c59742c0-4323-4b9d-8a02-723c251deaa0",
"ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
"9ec204e4-f0a3-42f8-8458-b772a6797cab",
"0f84bef9-9790-432e-92b7-eece357603fb",
"ce88f674-ab7a-43da-9201-468d38539e4a",
"3b27600c-3668-4abd-8f84-7bcdebbccbdb",
"a097acff-6266-4291-9fbd-137af7ecd439",
"bf4e9888-f10f-47af-8dba-76413038b73c",
"21760ecb-8f62-40d2-8d85-0cee5725cb72"
]
# for example_id in impress_list:
# try:
# main("libreoffice_impress", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
vs_code_list = [
"0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
"eabc805a-bfcf-4460-b250-ac92135819f6",
"982d12a5-beab-424f-8d38-d2a48429e511",
"4e60007a-f5be-4bfc-9723-c39affa0a6d3",
"e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
"9439a27b-18ae-42d8-9778-5f68f891805e",
"ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
"930fdb3b-11a8-46fe-9bac-577332e2640e",
"276cc624-87ea-4f08-ab93-f770e3790175",
"9d425400-e9b2-4424-9a4b-d4c7abac4140"
]
# for example_id in vs_code_list:
# try:
# main("vs_code", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
multiple_list = [
"f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
"b52b40a5-ad70-4c53-b5b0-5650a8387052",
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
"2b9493d7-49b8-493a-a71b-56cd1f4d6908",
"51f5801c-18b3-4f25-b0c3-02f85507a078",
"2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
"510f64c8-9bcc-4be1-8d30-638705850618",
"937087b6-f668-4ba6-9110-60682ee33441",
"ee9a3c83-f437-4879-8918-be5efbb9fac7",
"3680a5ee-6870-426a-a997-eba929a0d25c",
"e135df7c-7687-4ac0-a5f0-76b74438b53e",
"58565672-7bfe-48ab-b828-db349231de6b",
"2fe4b718-3bd7-46ec-bdce-b184f5653624"
]
for example_id in multiple_list:
try:
main("multi_apps", example_id)
except Exception as e:
logger.error("An error occurred while running the example: %s", e)
continue

View File

@@ -129,7 +129,7 @@ def main(example_class, example_id):
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_v1"
example["snapshot"] = "exp_v5"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],

View File

@@ -39,3 +39,4 @@ fastdtw
odfpy
openai
func-timeout
beautifulsoup4