Merge branch 'main' into zdy
This commit is contained in:
@@ -367,7 +367,7 @@ class SetupController:
|
||||
context = browser.contexts[0]
|
||||
|
||||
page = context.new_page() # Create a new page (tab) within the existing context
|
||||
page.goto(url)
|
||||
page.goto(url, timeout=60000)
|
||||
logger.info(f"Opened tab {i + 1}: {url}")
|
||||
|
||||
if i == 0:
|
||||
|
||||
@@ -84,7 +84,6 @@ class DesktopEnv(gym.Env):
|
||||
|
||||
# Initialize emulator and controller
|
||||
logger.info("Initializing...")
|
||||
self._config_screen_size()
|
||||
self._start_emulator()
|
||||
self.vm_ip = self._get_vm_ip()
|
||||
self.controller = PythonController(vm_ip=self.vm_ip)
|
||||
@@ -105,60 +104,6 @@ class DesktopEnv(gym.Env):
|
||||
self._step_no: int = 0
|
||||
self.action_history: List[Dict[str, any]] = []
|
||||
|
||||
def _config_screen_size(self):
|
||||
"""
|
||||
fixme: Experimental features, will cause unexpected error when system corrupt
|
||||
"""
|
||||
def calculate_vram_size(width, height, bits_per_pixel=32):
|
||||
"""
|
||||
Calculate VRAM size for given width, height, and color depth.
|
||||
Color depth defaults to 32 bits per pixel.
|
||||
"""
|
||||
bytes_per_pixel = bits_per_pixel // 8
|
||||
vram_size = width * height * bytes_per_pixel
|
||||
return vram_size
|
||||
|
||||
if not os.path.isfile(self.path_to_vm):
|
||||
logger.warning(f"The specified vmx file does not exist: {self.path_to_vm}")
|
||||
return False
|
||||
|
||||
width, height = self.vm_screen_size
|
||||
vramSize = calculate_vram_size(width, height)
|
||||
|
||||
try:
|
||||
with open(self.path_to_vm, 'r') as file:
|
||||
lines = file.readlines()
|
||||
|
||||
new_lines = []
|
||||
for line in lines:
|
||||
if "svga.autodetect" in line:
|
||||
continue
|
||||
elif "svga.vramSize" in line:
|
||||
continue
|
||||
elif "displayWidth" in line:
|
||||
continue
|
||||
elif "displayHeight" in line:
|
||||
continue
|
||||
else:
|
||||
new_lines.append(line)
|
||||
|
||||
# Append new settings for screen size and VRAM.
|
||||
new_lines.append(f'svga.autodetect = "TRUE"\n')
|
||||
new_lines.append(f'svga.vramSize = "{vramSize}"\n')
|
||||
new_lines.append(f'displayWidth = "{width}"\n')
|
||||
new_lines.append(f'displayHeight = "{height}"\n')
|
||||
|
||||
with open(self.path_to_vm, 'w') as file:
|
||||
file.writelines(new_lines)
|
||||
logger.info(f"Screen size for {self.path_to_vm} set to {width}x{height} with VRAM size {vramSize} bytes")
|
||||
return True
|
||||
except IOError as e:
|
||||
logger.error(f"An IOError occurred: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred: {e}")
|
||||
return False
|
||||
|
||||
def _start_emulator(self):
|
||||
while True:
|
||||
try:
|
||||
@@ -280,7 +225,6 @@ class DesktopEnv(gym.Env):
|
||||
_execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
|
||||
time.sleep(5)
|
||||
|
||||
self._config_screen_size()
|
||||
print(self.vm_screen_size)
|
||||
logger.info("Starting emulator...")
|
||||
self._start_emulator()
|
||||
@@ -350,6 +294,7 @@ class DesktopEnv(gym.Env):
|
||||
self.setup_controller.setup(self.evaluator.get("postconfig", []))
|
||||
|
||||
if type(self.metric) == list:
|
||||
results = []
|
||||
for idx, metric in enumerate(self.metric):
|
||||
try:
|
||||
config = self.evaluator["result"][idx]
|
||||
@@ -366,11 +311,12 @@ class DesktopEnv(gym.Env):
|
||||
**self.metric_options[idx]) if expected_state is not None \
|
||||
else metric(result_state, **self.metric_options[idx])
|
||||
|
||||
if self.metric_conj == 'and' and not bool(metric):
|
||||
if self.metric_conj == 'and' and float(metric) == 0.0:
|
||||
return 0
|
||||
elif self.metric_conj == 'or' and bool(metric):
|
||||
elif self.metric_conj == 'or' and float(metric) == 1.0:
|
||||
return 1
|
||||
return 1 if self.metric_conj == 'and' else 0
|
||||
else: results.append(metric)
|
||||
return sum(results) / len(results) if self.metric_conj == 'and' else max(results)
|
||||
else:
|
||||
try:
|
||||
result_state = self.result_getter(self, self.evaluator["result"])
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
# Setup Instructions
|
||||
|
||||
## Overall
|
||||
Disable the system crash report by:
|
||||
```
|
||||
sudo vim /etc/default/apport
|
||||
```
|
||||
and then change the `enabled` to `0`.
|
||||
|
||||
## LibreOffice
|
||||
For LibreOffice, please enter into the app first, and then enable the no pop-up when 'ctrl + s'.
|
||||
|
||||
@@ -209,3 +216,5 @@ pip install opencv-python-headless Pillow imagehash
|
||||
- Ensure VLC is running and the correct port (default is 8080) is being used.
|
||||
- If the port is in use by another application, you may change the port number in VLC's settings.
|
||||
|
||||
## GIMP
|
||||
Click on the "Keep" of the image loading pop-up.
|
||||
@@ -490,16 +490,16 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
|
||||
parent_id = file['id']
|
||||
|
||||
file.GetContentFile(_path, mimetype=file['mimeType'])
|
||||
except:
|
||||
logger.info('[ERROR]: Failed to download the file from Google Drive')
|
||||
except Exception as e:
|
||||
logger.info('[ERROR]: Failed to download the file from Google Drive', e)
|
||||
return None
|
||||
return _path
|
||||
|
||||
if 'query' in config:
|
||||
return get_single_file(config['query'], os.path.join(env.cache_dir, config['dest']))
|
||||
elif 'path' in config:
|
||||
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(config['path']) - 1
|
||||
else f'title = {fp} and trashed = false' for idx, fp in enumerate(config['path'])]
|
||||
query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(config['path']) - 1
|
||||
else f"title = '{fp}' and trashed = false" for idx, fp in enumerate(config['path'])]
|
||||
return get_single_file(query, os.path.join(env.cache_dir, config['dest']))
|
||||
elif 'query_list' in config:
|
||||
_path_list = []
|
||||
@@ -512,8 +512,8 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
|
||||
_path_list = []
|
||||
assert len(config['path_list']) == len(config['dest'])
|
||||
for idx, path in enumerate(config['path_list']):
|
||||
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(path) - 1
|
||||
else f'title = {fp} and trashed = false' for jdx, fp in enumerate(path)]
|
||||
query = [f"title = '{fp}' and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(path) - 1
|
||||
else f"title = '{fp}' and trashed = false" for jdx, fp in enumerate(path)]
|
||||
dest = config['dest'][idx]
|
||||
_path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
|
||||
return _path_list
|
||||
@@ -27,6 +27,11 @@ def get_gimp_config_file(env, config: Dict[str, str]):
|
||||
|
||||
_path = os.path.join(env.cache_dir, config["dest"])
|
||||
content = env.controller.get_file(config_path)
|
||||
|
||||
if not content:
|
||||
logger.error("Failed to get GIMP config file.")
|
||||
return None
|
||||
|
||||
with open(_path, "wb") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
@@ -8,6 +8,8 @@ from .chrome import (
|
||||
is_expected_tabs,
|
||||
is_expected_bookmarks,
|
||||
compare_pdfs,
|
||||
compare_htmls,
|
||||
compare_archive,
|
||||
is_cookie_deleted,
|
||||
is_shortcut_on_desktop,
|
||||
check_font_size,
|
||||
@@ -92,7 +94,8 @@ from .table import (
|
||||
)
|
||||
from .thunderbird import (
|
||||
check_thunderbird_prefs,
|
||||
check_thunderbird_filter
|
||||
check_thunderbird_filter,
|
||||
check_thunderbird_folder
|
||||
)
|
||||
from .vlc import (
|
||||
is_vlc_playing,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import logging, re
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import logging, re, os, shutil
|
||||
from typing import Any, Dict, List, Union
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
import fitz # PyMuPDF
|
||||
import rapidfuzz.fuzz as fuzz
|
||||
|
||||
@@ -14,7 +14,6 @@ def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> f
|
||||
Checks if the expected tabs are open in Chrome.
|
||||
"""
|
||||
|
||||
print(open_tabs, rule)
|
||||
match_type = rule['type']
|
||||
|
||||
if match_type == "url":
|
||||
@@ -53,10 +52,12 @@ def is_expected_search_query(active_tab_info: Dict[str, str], rules: Dict[str, A
|
||||
return 0.
|
||||
|
||||
|
||||
def compare_pdfs(pdf1_path, pdf2_path):
|
||||
def compare_pdfs(pdf1_path: Union[str, List[str]], pdf2_path: Union[str, List[str]]):
|
||||
"""
|
||||
Compare two PDF files.
|
||||
"""
|
||||
if type(pdf2_path) != list:
|
||||
pdf1_path, pdf2_path = [pdf1_path], [pdf2_path]
|
||||
|
||||
def extract_text_from_pdf(pdf_path):
|
||||
"""Extract text from each page of the PDF."""
|
||||
@@ -65,14 +66,100 @@ def compare_pdfs(pdf1_path, pdf2_path):
|
||||
for page in pdf:
|
||||
text += page.get_text()
|
||||
return text.strip()
|
||||
try:
|
||||
text1 = extract_text_from_pdf(pdf1_path)
|
||||
text2 = extract_text_from_pdf(pdf2_path)
|
||||
|
||||
return fuzz.ratio(text1, text2) / 100
|
||||
except Exception as e:
|
||||
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
|
||||
return 0.0
|
||||
score = 0.
|
||||
for path1, path2 in zip(pdf1_path, pdf2_path):
|
||||
try:
|
||||
text1 = extract_text_from_pdf(path1)
|
||||
text2 = extract_text_from_pdf(path2)
|
||||
score += fuzz.ratio(text1, text2) / 100
|
||||
except Exception as e:
|
||||
logger.info(f"[ERROR]: unexpected error occurred when comparing PDF files: {e}")
|
||||
return score / len(pdf2_path)
|
||||
|
||||
|
||||
def compare_archive(pred_path: str, gold_path: str, **kwargs) -> float:
|
||||
"""
|
||||
Compare two archives. Note that the files in the archives should be of the same type.
|
||||
"""
|
||||
if not pred_path: return 0.
|
||||
pred_folder = os.path.splitext(pred_path)[0] + '_pred'
|
||||
gold_folder = os.path.splitext(gold_path)[0] + '_gold'
|
||||
|
||||
if os.path.exists(pred_folder): # remove existing folder for new predictions
|
||||
shutil.rmtree(pred_folder, ignore_errors=True)
|
||||
os.makedirs(pred_folder)
|
||||
shutil.unpack_archive(pred_path, pred_folder)
|
||||
if not os.path.exists(gold_folder): # use cache if exists
|
||||
os.makedirs(gold_folder)
|
||||
shutil.unpack_archive(gold_path, gold_folder)
|
||||
|
||||
pred_files = sorted(os.listdir(pred_folder))
|
||||
gold_files = sorted(os.listdir(gold_folder))
|
||||
if pred_files != gold_files: return 0.
|
||||
|
||||
def get_compare_function():
|
||||
file_type = kwargs.pop('file_type', 'text')
|
||||
if file_type == 'text':
|
||||
from .vscode import compare_text_file
|
||||
return compare_text_file
|
||||
elif file_type == 'pdf': return compare_pdfs
|
||||
elif file_type == 'docx':
|
||||
from .docs import compare_docx_files
|
||||
return compare_docx_files
|
||||
elif file_type == 'ppt':
|
||||
from .slides import compare_pptx_files
|
||||
return compare_pptx_files
|
||||
elif file_type == 'image':
|
||||
from .vlc import compare_images
|
||||
return compare_images
|
||||
elif file_type == 'csv':
|
||||
from .table import compare_csv
|
||||
return compare_csv
|
||||
elif file_type == 'table':
|
||||
from .table import compare_table
|
||||
return compare_table
|
||||
elif file_type == 'audio':
|
||||
from .vlc import compare_audios
|
||||
return compare_audios
|
||||
elif file_type == 'video':
|
||||
from .vlc import compare_videos
|
||||
return compare_videos
|
||||
else: raise ValueError('[ERROR]: not support file type: %s' % file_type)
|
||||
|
||||
score = 0
|
||||
compare_function = get_compare_function()
|
||||
for f1, f2 in zip(pred_files, gold_files):
|
||||
fp1 = os.path.join(pred_folder, f1)
|
||||
fp2 = os.path.join(gold_folder, f2)
|
||||
score += compare_function(fp1, fp2, **kwargs)
|
||||
return score / len(pred_files)
|
||||
|
||||
|
||||
def compare_htmls(html_path1: str, html_path2: str) -> float:
|
||||
"""
|
||||
Compare two HTML files.
|
||||
"""
|
||||
with open(html_path1, 'r', encoding='utf-8') as inf:
|
||||
soup1 = BeautifulSoup(inf, 'lxml')
|
||||
with open(html_path2, 'r', encoding='utf-8') as inf:
|
||||
soup2 = BeautifulSoup(inf, 'lxml')
|
||||
|
||||
def compare_elements(elem1, elem2):
|
||||
if not (isinstance(elem1, Tag) and isinstance(elem2, Tag)):
|
||||
return elem1 == elem2
|
||||
if elem1.name != elem2.name:
|
||||
return False
|
||||
if elem1.text.strip() != elem2.text.strip():
|
||||
return False
|
||||
if elem1.attrs != elem2.attrs:
|
||||
return False
|
||||
return True
|
||||
|
||||
for elem1, elem2 in zip(soup1.recursiveChildGenerator(), soup2.recursiveChildGenerator()):
|
||||
if not compare_elements(elem1, elem2):
|
||||
return .0
|
||||
return 1.
|
||||
|
||||
|
||||
def is_cookie_deleted(cookie_data, rule):
|
||||
|
||||
@@ -12,6 +12,8 @@ def compare_image_list(pred_img_path_list: Union[str, List[str]],
|
||||
pred_img_path_list = [pred_img_path_list]
|
||||
gold_img_path_list = [gold_img_path_list]
|
||||
for pred_img_path, gold_img_path in zip(pred_img_path_list, gold_img_path_list):
|
||||
if not pred_img_path or not gold_img_path:
|
||||
return 0.0
|
||||
pred_img = Image.open(pred_img_path)
|
||||
gold_img = Image.open(gold_img_path)
|
||||
diff = ImageChops.difference(pred_img, gold_img)
|
||||
|
||||
@@ -190,8 +190,7 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
||||
if run1.font.underline != run2.font.underline and examine_font_underline:
|
||||
return 0
|
||||
|
||||
if ('strike' in run1.font._element.attrib) != (
|
||||
'strike' in run2.font._element.attrib) and examine_strike_through:
|
||||
if run1.font._element.attrib.get('strike', 'noStrike') != run2.font._element.attrib.get('strike', 'noStrike') and examine_strike_through:
|
||||
return 0
|
||||
|
||||
# fixme: Actually there are more properties to be compared, but we cannot get them through pptx
|
||||
@@ -250,6 +249,14 @@ def evaluate_presentation_fill_to_rgb_distance(pptx_file, rules):
|
||||
r1, g1, b1 = fill.fore_color.rgb
|
||||
r2, g2, b2 = _rgb
|
||||
return sqrt((r1 - r2) ** 2 + (g1 - g2) ** 2 + (b1 - b2) ** 2) / sqrt(255 ** 2 + 255 ** 2 + 255 ** 2)
|
||||
elif fill.type == 5:
|
||||
master_fill = _slide.slide_layout.slide_master.background.fill
|
||||
if master_fill.type == 1:
|
||||
r1, g1, b1 = master_fill.fore_color.rgb
|
||||
else:
|
||||
return 1
|
||||
r2, g2, b2 = _rgb
|
||||
return sqrt((r1 - r2) ** 2 + (g1 - g2) ** 2 + (b1 - b2) ** 2) / sqrt(255 ** 2 + 255 ** 2 + 255 ** 2)
|
||||
|
||||
return 1
|
||||
|
||||
@@ -363,3 +370,7 @@ def check_page_number_colors(pptx_file, rules):
|
||||
return 0
|
||||
|
||||
return 1
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(compare_pptx_files(r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\550ce7e7-747b-495f-b122-acdc4d0b8e54\New_Club_Spring_2018_Training_Gold.pptx", r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\550ce7e7-747b-495f-b122-acdc4d0b8e54\New_Club_Spring_2018_Training_Gold.pptx"))
|
||||
# print(evaluate_presentation_fill_to_rgb_distance(r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\3b27600c-3668-4abd-8f84-7bcdebbccbdb\lec17-gui-events.pptx", {"rgb": (0, 0, 255)}))
|
||||
@@ -128,6 +128,47 @@ def check_thunderbird_filter(result: str, rules: Dict[str, List[Dict[str, str]]]
|
||||
unexpect_metric = unexpect_metric and not any(_match_record(r, flt) for r in rules.get("unexpect", []))
|
||||
return float(all(expect_metrics) and unexpect_metric)
|
||||
|
||||
|
||||
def check_thunderbird_folder(result: Union[str, List[str]], reference: Union[str, List[str]], **kwargs) -> float:
|
||||
"""
|
||||
Check the file or file_list that each text file contains all messages in a folder in Thunderbird. Each message is started with `FROM - `.
|
||||
**kwargs:
|
||||
ignore_status (bool): for comparison, ignore the status (X-Mozilla-Status: 0000) of each message. default: False
|
||||
ignore_keys (bool): for comparison, ignore the keys (X-Mozilla-Keys: label) of each message. default: False
|
||||
remove_deleted (bool): ignore deleted messages which has status code 0008 or 0009. default: True
|
||||
remove_duplicate (bool): remove duplicate messages. default: True
|
||||
"""
|
||||
def normalize_msg(msg, options):
|
||||
ignore_status = options.get('ignore_status', False)
|
||||
ignore_keys = options.get('ignore_keys', False)
|
||||
if ignore_status:
|
||||
msg = re.sub(r'X-Mozilla-Status\d?:[\s\d]+', '', msg)
|
||||
if ignore_keys:
|
||||
msg = re.sub(r'(X-Mozilla-Keys:[^\n]*?)\n(MIME-Version)', r'\2', msg)
|
||||
return msg.strip()
|
||||
|
||||
def read_thunderbird_folder_file(path: str) -> str:
|
||||
with open(path, 'r') as inf:
|
||||
data = inf.read().strip()
|
||||
messages = []
|
||||
for mail in data.split('FROM - '):
|
||||
if mail.strip(): continue
|
||||
if kwargs.get('remove_deleted', True) and re.search(r'X-Mozilla-Status: 000[89]', mail): continue
|
||||
messages.append('FROM - ' + normalize_msg(mail, kwargs))
|
||||
if kwargs.get('remove_duplicate', True):
|
||||
messages = set(messages)
|
||||
return '\n'.join(sorted(messages))
|
||||
|
||||
if type(reference) != list:
|
||||
result, reference = [result], [reference]
|
||||
for pred, gold in zip(result, reference):
|
||||
if pred is None: return .0
|
||||
mail1 = read_thunderbird_folder_file(pred)
|
||||
mail2 = read_thunderbird_folder_file(gold)
|
||||
if mail1 != mail2: return .0
|
||||
return 1.0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#import lxml.etree
|
||||
#from lxml.cssselect import CSSSelector
|
||||
|
||||
@@ -49,6 +49,8 @@ def check_json_settings(actual: str, expected: str, **options) -> float:
|
||||
Return:
|
||||
float: the score
|
||||
"""
|
||||
if not actual:
|
||||
return 0.
|
||||
|
||||
with open(actual, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
|
||||
"snapshot": "libreoffice_impress",
|
||||
"instruction": "Please make the background blue on all my slides.",
|
||||
"instruction": "Please make the background blue on all my slides. I was stuck by finding the entrance to do that for a while...",
|
||||
"source": "https://www.libreofficehelp.com/change-slide-background-impress/#All_Slides",
|
||||
"config": [
|
||||
{
|
||||
|
||||
@@ -40,9 +40,8 @@
|
||||
"type": "chrome_open_tabs",
|
||||
"parameters": {
|
||||
"urls_to_open": [
|
||||
"https://www.freerice.com/",
|
||||
"https://www.hku.hk/",
|
||||
"https://about.meta.com/technologies/facebook-app/"
|
||||
"https://news.google.com",
|
||||
"https://x.com"
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
@@ -44,9 +44,8 @@
|
||||
"type": "chrome_open_tabs",
|
||||
"parameters": {
|
||||
"urls_to_open": [
|
||||
"https://www.freerice.com/",
|
||||
"https://www.hku.hk/",
|
||||
"https://about.meta.com/technologies/facebook-app/"
|
||||
"https://news.google.com",
|
||||
"https://x.com"
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
"parameters": {
|
||||
"urls_to_open": [
|
||||
"https://www.apple.com/",
|
||||
"https://en.sjtu.edu.cn/",
|
||||
"https://scholar.google.com/"
|
||||
]
|
||||
}
|
||||
@@ -83,7 +82,6 @@
|
||||
"type": "url",
|
||||
"urls": [
|
||||
"https://www.apple.com/",
|
||||
"https://en.sjtu.edu.cn/",
|
||||
"https://scholar.google.com/",
|
||||
"https://www.amazon.com/"
|
||||
]
|
||||
|
||||
@@ -0,0 +1,177 @@
|
||||
{
|
||||
"id": "78aed49a-a710-4321-a793-b611a7c5b56b",
|
||||
"snapshot": "chrome",
|
||||
"instruction": "Could you help me save all attachments of the oldest email in Bills local folders to the attachment/ folder in Google Drive and then move this email to a different folder \"have_seen\" in Local Folders.",
|
||||
"source": "https://marketplace.uipath.com/listings/upload-email-attachments-from-gmail-to-google-drive",
|
||||
"config": [
|
||||
{
|
||||
"type": "googledrive",
|
||||
"parameters": {
|
||||
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
|
||||
"operation": ["delete"],
|
||||
"args": [
|
||||
{
|
||||
"query": "title = 'attachment' and 'root' in parents and trashed = false",
|
||||
"trash": false
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"google-chrome",
|
||||
"--remote-debugging-port=1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"socat",
|
||||
"tcp-listen:9222,fork",
|
||||
"tcp:localhost:1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "chrome_open_tabs",
|
||||
"parameters": {
|
||||
"urls_to_open": [
|
||||
"https://stackoverflow.com/",
|
||||
"https://paperswithcode.com/"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "login",
|
||||
"parameters": {
|
||||
"settings_file": "evaluation_examples/settings/google/settings.json",
|
||||
"platform": "googledrive"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "download",
|
||||
"parameters": {
|
||||
"files": [
|
||||
{
|
||||
"url": "https://drive.usercontent.google.com/download?id=1W18fZ4Dog-cSH3Fxa3bcfTz28z6AsL_1&export=download&authuser=0&confirm=t&uuid=7c8fc164-6e7b-4849-aba1-f3dba947feba&at=APZUnTWd2DFbIYnlMf-GY4qMqXcc:1706682437093",
|
||||
"path": "/home/user/thunderbird-profile.tar.gz"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "execute",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"tar",
|
||||
"-xz",
|
||||
"--recursive-unlink",
|
||||
"-f",
|
||||
"/home/user/thunderbird-profile.tar.gz",
|
||||
"-C",
|
||||
"/home/user/"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"/usr/bin/thunderbird"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"thunderbird",
|
||||
"chrome"
|
||||
],
|
||||
"evaluator": {
|
||||
"func": [
|
||||
"check_thunderbird_folder",
|
||||
"compare_pdfs"
|
||||
],
|
||||
"result": [
|
||||
{
|
||||
"type": "vm_file",
|
||||
"multi": true,
|
||||
"path": [
|
||||
"/home/user/.thunderbird/t5q2a5hp.default-release/Mail/Local Folders/have_seen",
|
||||
"/home/user/.thunderbird/t5q2a5hp.default-release/Mail/Local Folders/Bills"
|
||||
],
|
||||
"dest": [
|
||||
"have_seen",
|
||||
"Bills"
|
||||
],
|
||||
"gives": [
|
||||
0,
|
||||
1
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "googledrive_file",
|
||||
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
|
||||
"path_list": [
|
||||
[
|
||||
"attachment",
|
||||
"Invoice-01234567-89ab.pdf"
|
||||
],
|
||||
[
|
||||
"attachment",
|
||||
"Receipt-0123-4567.pdf"
|
||||
]
|
||||
],
|
||||
"dest": [
|
||||
"Invoice-01234567-89ab.pdf",
|
||||
"Receipt-0123-4567.pdf"
|
||||
]
|
||||
}
|
||||
],
|
||||
"expected": [
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"multi": true,
|
||||
"path": [
|
||||
"https://drive.usercontent.google.com/download?id=12ielR0p3jFZ4UDfGlC7fdF88uUTM_3bn&export=download&authuser=0&confirm=t&uuid=6ef47d29-2772-49cd-87e7-17efd587f1ba&at=APZUnTUGjXxDcDcMF5RoAsVtbFOB:1706689562530",
|
||||
"https://drive.usercontent.google.com/download?id=13XQ4Hr62WyHVwpefjD-tgkpjb0p9HKpe&export=download&authuser=0&confirm=t&uuid=ad4ee033-83e4-4df4-9f9c-ac61d43634bc&at=APZUnTV5ZI1GFXNHTyIblwSKjyku:1706692277803"
|
||||
],
|
||||
"dest": [
|
||||
"have_seen_gold",
|
||||
"Bills_gold"
|
||||
],
|
||||
"gives": [
|
||||
0,
|
||||
1
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"multi": true,
|
||||
"path": [
|
||||
"https://drive.usercontent.google.com/download?id=1SlzOPRfzaKyhBERy-ks2-rjzrjtvgoDG&export=download&authuser=0&confirm=t&uuid=e4cb717b-fb5d-4860-99d9-9e1f38df2592&at=APZUnTXlcSimcf9qZ7uZmlAUj-zQ:1706683984205",
|
||||
"https://drive.usercontent.google.com/download?id=19SdAYymlHvFQ7wzc-_JfFYOYMW6xAw5-&export=download&authuser=0&confirm=t&uuid=e1cbff64-d615-493d-9d02-bcdbd40f878c&at=APZUnTVRpcei6q_BjzQ7EtHA7voq:1706683990388"
|
||||
],
|
||||
"dest": [
|
||||
"Invoice-01234567-89ab_gold.pdf",
|
||||
"Receipt-0123-4567_gold.pdf"
|
||||
],
|
||||
"gives": [
|
||||
0,
|
||||
1
|
||||
]
|
||||
}
|
||||
],
|
||||
"options": [
|
||||
{
|
||||
"remove_deleted": true,
|
||||
"remove_duplicate": true
|
||||
},
|
||||
{}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -21,7 +21,7 @@
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"chromium-browser",
|
||||
"google-chrome",
|
||||
"--remote-debugging-port=1337"
|
||||
]
|
||||
}
|
||||
@@ -42,6 +42,7 @@
|
||||
"urls_to_open": [
|
||||
"https://www.zhihu.com/",
|
||||
"https://www.coursera.org/",
|
||||
"https://www.deepl.com",
|
||||
"https://www.wikidata.org/wiki/Wikidata:Main_Page"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -0,0 +1,132 @@
|
||||
{
|
||||
"id": "a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb",
|
||||
"snapshot": "chrome",
|
||||
"instruction": "Please help me backup my emails in \"Bills\" folder in Thunderbird and store the .eml files with only subject names to my Google Drive folder called \"emails\".",
|
||||
"source": "https://marketplace.uipath.com/listings/merge-pdfs-from-gmail-email-attachments-and-upload-to-gogle-drive",
|
||||
"config": [
|
||||
{
|
||||
"type": "googledrive",
|
||||
"parameters": {
|
||||
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
|
||||
"operation": ["delete"],
|
||||
"args": [
|
||||
{
|
||||
"query": "title = 'emails' and 'root' in parents",
|
||||
"trash": false
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"google-chrome",
|
||||
"--remote-debugging-port=1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"socat",
|
||||
"tcp-listen:9222,fork",
|
||||
"tcp:localhost:1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "chrome_open_tabs",
|
||||
"parameters": {
|
||||
"urls_to_open": [
|
||||
"https://arxiv.org/",
|
||||
"https://github.com/ohmyzsh/ohmyzsh/wiki/themes",
|
||||
"https://releases.ubuntu.com/"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "login",
|
||||
"parameters": {
|
||||
"settings_file": "evaluation_examples/settings/google/settings.json",
|
||||
"platform": "googledrive"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "download",
|
||||
"parameters": {
|
||||
"files": [
|
||||
{
|
||||
"url": "https://drive.usercontent.google.com/download?id=1cATYRGGbZ_vZEjJWajI1Dn4gmp3Won-l&export=download&authuser=0&confirm=t&uuid=156022ae-a56a-400c-a934-34e4369ec82a&at=APZUnTWMnGMhJciITn7IvRY33zuJ:1706707804986",
|
||||
"path": "/home/user/thunderbird-profile.tar.gz"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "execute",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"tar",
|
||||
"-xz",
|
||||
"--recursive-unlink",
|
||||
"-f",
|
||||
"/home/user/thunderbird-profile.tar.gz",
|
||||
"-C",
|
||||
"/home/user/"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"/usr/bin/thunderbird"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"thunderbird",
|
||||
"chrome"
|
||||
],
|
||||
"evaluator": {
|
||||
"func": "check_thunderbird_folder",
|
||||
"result": {
|
||||
"type": "googledrive_file",
|
||||
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
|
||||
"path_list": [
|
||||
[
|
||||
"emails",
|
||||
"Amazon Web Services Invoice Available [Account: 0123456789ab] [Invoice ID: 0123456789].eml"
|
||||
],
|
||||
[
|
||||
"emails",
|
||||
"Your receipt from X (formerly Twitter) #0123-4567.eml"
|
||||
]
|
||||
],
|
||||
"dest": [
|
||||
"pred1.eml",
|
||||
"pred2.eml"
|
||||
]
|
||||
},
|
||||
"expected": {
|
||||
"type": "cloud_file",
|
||||
"multi": "true",
|
||||
"path": [
|
||||
"https://drive.usercontent.google.com/download?id=1Fb2ofAjfn-wlIYbtIGXNvLJEB85KbO7Y&export=download&authuser=0&confirm=t&uuid=9a996347-9093-43ed-8ad2-7e769e0576bd&at=APZUnTUp2pvXjM0zGvjc22lewOv3:1706708252395",
|
||||
"https://drive.usercontent.google.com/download?id=1LZ2PgwmVgO62dNOueWy7RLlJ1_d8Hz3E&export=download&authuser=0&confirm=t&uuid=f632abc2-3ff7-4e9f-a7c6-72c9bbc44654&at=APZUnTXihve7i15GwLyEx2rwDFUk:1706708265408"
|
||||
],
|
||||
"dest": [
|
||||
"gold1.eml",
|
||||
"gold2.eml"
|
||||
],
|
||||
"gives": [
|
||||
0,
|
||||
1
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -40,9 +40,9 @@
|
||||
"type": "chrome_open_tabs",
|
||||
"parameters": {
|
||||
"urls_to_open": [
|
||||
"https://www.freerice.com/",
|
||||
"https://www.hku.hk/",
|
||||
"https://about.meta.com/technologies/facebook-app/"
|
||||
"https://news.google.com",
|
||||
"https://x.com",
|
||||
"https://www.deepl.com"
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
{
|
||||
"id": "c867c42d-a52d-4a24-8ae3-f75d256b5618",
|
||||
"snapshot": "thunderbird",
|
||||
"instruction": "Please assist me in exporting my contacts of Personal Address Book from Thunderbird into contacts.csv file in the desktop and convert it to .xlsx with Libreoffice Calc.",
|
||||
"source": "https://www.sync.blue/en/sync/mozilla-thunderbird/google-sheets/",
|
||||
"config": [
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"libreoffice",
|
||||
"--calc"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "download",
|
||||
"parameters": {
|
||||
"files": [
|
||||
{
|
||||
"url": "https://drive.usercontent.google.com/download?id=1wKXmJ14dnxSzdy9ZF_ePWU7zpevY6Dry&export=download&authuser=0&confirm=t&uuid=9b476c95-8eee-4a9a-8cee-c3620d5ce250&at=APZUnTUzDeeeMNr34DB1vEnBK6N7:1706719624132",
|
||||
"path": "/home/user/thunderbird-profile.tar.gz"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "execute",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"tar",
|
||||
"--recursive-unlink",
|
||||
"-xz",
|
||||
"-f",
|
||||
"/home/user/thunderbird-profile.tar.gz",
|
||||
"-C",
|
||||
"/home/user/"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"/usr/bin/thunderbird"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"thunderbird",
|
||||
"libreoffice_calc"
|
||||
],
|
||||
"evaluator": {
|
||||
"func": [
|
||||
"compare_csv",
|
||||
"compare_table"
|
||||
],
|
||||
"conj": "and",
|
||||
"result": [
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/contacts.csv",
|
||||
"dest": "contacts.csv"
|
||||
},
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/contacts.xlsx",
|
||||
"dest": "contacts.xlsx"
|
||||
}
|
||||
],
|
||||
"expected": [
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://drive.usercontent.google.com/download?id=1StwASpAR2ALq2Y1vugGsdUJptg6FwjEm&export=download&authuser=0&confirm=t&uuid=56339e19-b889-4da1-ab72-5e0b90f13fff&at=APZUnTVWFF2pBrtWU_hXgzfbrWP2:1706719668676",
|
||||
"dest": "contacts_gold.csv"
|
||||
},
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://drive.usercontent.google.com/download?id=1s25eUpvkMzSm6p_WA7O13t6mVqmkxr2C&export=download&authuser=0&confirm=t&uuid=901cbd32-6026-4391-a5cc-989e1047cf7c&at=APZUnTUs27mZceDshB_f9Tx4PFyz:1706719610831",
|
||||
"dest": "contacts_gold.xlsx"
|
||||
}
|
||||
],
|
||||
"options": [
|
||||
{},
|
||||
{
|
||||
"rules": [
|
||||
{
|
||||
"type": "sheet_data",
|
||||
"sheet_idx0": "RI0",
|
||||
"sheet_idx1": "EI0"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"id": "d9b7c649-c975-4f53-88f5-940b29c47247",
|
||||
"snapshot": "thunderbird",
|
||||
"instruction": "Help me extract the latest 5 emails in daily folder from Thunderbird, from the earliest to the most recent by time, and creates a LibreOffice Calc Report \"report.xlsx\" in the desktop, storing the sender_name, sender_address, subject, CC, and number_of_attachments.",
|
||||
"source": "https://marketplace.uipath.com/listings/extract-the-first-1000-gmail-emails-from-the-current-month-in-a-new-google-sheets-report",
|
||||
"config": [
|
||||
{
|
||||
"type": "download",
|
||||
"parameters": {
|
||||
"files": [
|
||||
{
|
||||
"url": "https://drive.usercontent.google.com/download?id=1aCmZvSIs8Mb2kM_IVyhTbLawFvHOrlW-&export=download&authuser=0&confirm=t&uuid=dee0fe8b-731b-4bb7-97c4-02a2d8154da8&at=APZUnTVO-WjksU7WYUq4sCkNMlsL:1706710969972",
|
||||
"path": "/home/user/thunderbird-profile.tar.gz"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "execute",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"tar",
|
||||
"-xz",
|
||||
"--recursive-unlink",
|
||||
"-f",
|
||||
"/home/user/thunderbird-profile.tar.gz",
|
||||
"-C",
|
||||
"/home/user/"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"/usr/bin/thunderbird"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"thunderbird",
|
||||
"libreoffice_calc"
|
||||
],
|
||||
"evaluator": {
|
||||
"func": "compare_table",
|
||||
"result": {
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/report.xlsx",
|
||||
"dest": "report.xlsx"
|
||||
},
|
||||
"expected": {
|
||||
"type": "cloud_file",
|
||||
"path": "https://drive.usercontent.google.com/download?id=13L73ukCzuLYUgY11xz2b6DEoYhFvmXVE&export=download&authuser=0&confirm=t&uuid=82e92b1f-d4c0-44e6-b40f-595dff880acd&at=APZUnTXzPvN60uTLjy7QSPEF2Ft9:1706714031096",
|
||||
"dest": "report_gold.xlsx"
|
||||
},
|
||||
"options": {
|
||||
"rules": [
|
||||
{
|
||||
"type": "sheet_data",
|
||||
"sheet_idx0": "RI0",
|
||||
"sheet_idx1": "EI0"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
{
|
||||
"id": "e135df7c-7687-4ac0-a5f0-76b74438b53e",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "Please convert a .xlsx file opened in LibreOffice Calc to a .html file and view it in Chrome.",
|
||||
"source": "https://www.ilovefreesoftware.com/23/featured/free-csv-to-html-converter-software-windows.html",
|
||||
"config": [
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"google-chrome",
|
||||
"--remote-debugging-port=1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"socat",
|
||||
"tcp-listen:9222,fork",
|
||||
"tcp:localhost:1337"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "chrome_open_tabs",
|
||||
"parameters": {
|
||||
"urls_to_open": [
|
||||
"https://aclanthology.org/",
|
||||
"https://openai.com/",
|
||||
"https://www.linkedin.com/home/"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "download",
|
||||
"parameters": {
|
||||
"files": [
|
||||
{
|
||||
"url": "https://drive.usercontent.google.com/download?id=1zio_nULUdQGFGFBD55aZ52vhqYBtlQeJ&export=download&authuser=0&confirm=t&uuid=68483de0-2035-461d-90d5-e4048825d1ce&at=APZUnTXRMiB4UDzbsqrgm6BbFefE:1706630941497",
|
||||
"path": "/home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.xlsx"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "open",
|
||||
"parameters": {
|
||||
"path": "/home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.xlsx"
|
||||
}
|
||||
}
|
||||
],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"thunderbird",
|
||||
"chrome"
|
||||
],
|
||||
"evaluator": {
|
||||
"func": ["is_expected_tabs", "compare_htmls"],
|
||||
"result": [
|
||||
{
|
||||
"type": "open_tabs_info"
|
||||
},
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.html",
|
||||
"dest": "annual-enterprise-survey-2021-financial-year-provisional.html"
|
||||
}
|
||||
],
|
||||
"expected": [
|
||||
{
|
||||
"type": "rule",
|
||||
"rules": {
|
||||
"type": "url",
|
||||
"urls": [
|
||||
"https://aclanthology.org/",
|
||||
"https://openai.com/",
|
||||
"https://www.linkedin.com/home/",
|
||||
"file:///home/user/Desktop/annual-enterprise-survey-2021-financial-year-provisional.html"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://drive.usercontent.google.com/download?id=1HvdTjLZctQGgo3BojmPBiSrPXFkqubJi&export=download&authuser=0&confirm=t&uuid=47f78e2c-bc38-416b-900f-4837ff588bfa&at=APZUnTXSartmo7MSTsanrkc9zHpm:1706680670213",
|
||||
"dest": "annual-enterprise-survey-2021-financial-year-provisional_gold.html"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,121 @@
|
||||
{
|
||||
"id": "f7dfbef3-7697-431c-883a-db8583a4e4f9",
|
||||
"snapshot": "libreoffice_writer",
|
||||
"instruction": "Could you convert all `.doc` files in current directory to PDF all at once in the command line?",
|
||||
"source": "https://www.thegeekdiary.com/libreoffice-command-examples-in-linux/",
|
||||
"config": [
|
||||
{
|
||||
"type": "download",
|
||||
"parameters": {
|
||||
"files": [
|
||||
{
|
||||
"url": "https://drive.usercontent.google.com/download?id=1efUzatD7vixloPY9-XENOL3OZ5__1hod&export=download&authuser=0&confirm=t&uuid=14a3a6b1-a6e2-46cf-9e5b-bcc9e2acbc98&at=APZUnTWtyQUzT6eO5ZTcnmAZkwyX:1706704733958",
|
||||
"path": "/home/user/Desktop/doc.tar.gz"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "execute",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"tar -zxf /home/user/Desktop/doc.tar.gz -C /home/user/Desktop/ && rm /home/user/Desktop/doc.tar.gz"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "execute",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"history -c && echo > ~/.bash_history && sleep 3"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"gnome-terminal",
|
||||
"--maximize",
|
||||
"--working-directory=/home/user/Desktop"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"trajectory": "trajectories/",
|
||||
"related_apps": [
|
||||
"libreoffice_writer",
|
||||
"terminal"
|
||||
],
|
||||
"evaluator": {
|
||||
"postconfig": [
|
||||
{
|
||||
"type": "execute",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"cd /home/user/Desktop && tar -zcf pdf.tar.gz *.pdf"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "execute",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"killall gnome-terminal-server"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"func": [
|
||||
"check_include_exclude",
|
||||
"compare_archive"
|
||||
],
|
||||
"result": [
|
||||
{
|
||||
"type": "vm_command_line",
|
||||
"command": [
|
||||
"/bin/bash",
|
||||
"-c",
|
||||
"output=$(cat ~/.bash_history | grep -E \"(soffice|libreoffice).+--convert-to\\s+pdf.+\\*\\.doc\"); if [ -z \"$output\" ]; then echo \"failed to complete this task\"; else echo \"catch the desired command\"; fi"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/pdf.tar.gz",
|
||||
"dest": "pdf.tar.gz"
|
||||
}
|
||||
],
|
||||
"expected": [
|
||||
{
|
||||
"type": "rule",
|
||||
"rules": {
|
||||
"include": [
|
||||
"catch the desired command"
|
||||
],
|
||||
"exclude": [
|
||||
"failed to complete this task"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://drive.usercontent.google.com/download?id=1A-UoQdQvvtFBQWkPi_Q8ecNe4gjlh0dg&export=download&authuser=0&confirm=t&uuid=0c97e2f6-3de8-438f-a676-557cca5ee292&at=APZUnTUy4pghj5TtS-suVMXrCLO2:1706705033330",
|
||||
"dest": "pdf_gold.tar.gz"
|
||||
}
|
||||
],
|
||||
"options": [
|
||||
{},
|
||||
{
|
||||
"file_type": "pdf"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82",
|
||||
"snapshot": "os",
|
||||
"instruction": "Can you move the file with the path 'todo.txt' to the directory with the path 'done'?",
|
||||
"instruction": "Can you move the file with the path 'todo.txt' on the Desktop to the directory with the path 'done' on the Desktop?",
|
||||
"source": "https://ubuntu.com/tutorials/command-line-for-beginners#5-moving-and-manipulating-files",
|
||||
"config": [
|
||||
{
|
||||
|
||||
@@ -18,7 +18,10 @@
|
||||
{
|
||||
"type": "launch",
|
||||
"parameters": {
|
||||
"command": ["code", "/home/user/Desktop/vscode_replace_text.txt"]
|
||||
"command": [
|
||||
"code",
|
||||
"/home/user/Desktop/vscode_replace_text.txt"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -33,6 +36,36 @@
|
||||
"vscode"
|
||||
],
|
||||
"evaluator": {
|
||||
"postconfig": [
|
||||
{
|
||||
"type": "activate_window",
|
||||
"parameters": {
|
||||
"window_name": "Visual Studio Code"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "sleep",
|
||||
"parameters": {
|
||||
"seconds": 0.5
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "execute",
|
||||
"parameters": {
|
||||
"command": [
|
||||
"python",
|
||||
"-c",
|
||||
"import pyautogui; pyautogui.hotkey('ctrl', 's');"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "sleep",
|
||||
"parameters": {
|
||||
"seconds": 0.5
|
||||
}
|
||||
}
|
||||
],
|
||||
"func": "compare_text_file",
|
||||
"expected": {
|
||||
"type": "cloud_file",
|
||||
|
||||
@@ -1,13 +1 @@
|
||||
{
|
||||
"installed": {
|
||||
"client_id": "786888752612-6cv6lermep9n6704s4kv20h08lotias9.apps.googleusercontent.com",
|
||||
"project_id": "xlang-2024-benchmarking",
|
||||
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
||||
"token_uri": "https://oauth2.googleapis.com/token",
|
||||
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
||||
"client_secret": "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",
|
||||
"redirect_uris": [
|
||||
"http://localhost"
|
||||
]
|
||||
}
|
||||
}
|
||||
{"installed":{"client_id":"786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com","project_id":"xlang-2024-benchmarking","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}
|
||||
@@ -1 +1 @@
|
||||
{"access_token": "ya29.a0AfB_byBdrcgbmWKGyEUOxxuJBhxOs1uu0AqWeLgJKKKLG_dVg4iQKJAdiWD2oetHrKn17p4ZtfX-vt1VQ0BiF2MPD2exX1oESsQkXaO8q4TM1olIpadvlUBqUkqKJcjCqn1dp1oaTVYU-Srf2wQCGnDt3ozjljdkHXN_MQaCgYKAa4SARISFQHGX2MiWnixlrP3Se3vEV73_4fenA0173", "client_id": "786888752612-6cv6lermep9n6704s4kv20h08lotias9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0e0qXy4xW1Ud5CgYIARAAGA4SNwF-L9IrWfaomed_CK0R7zZffcpT-GIXf3y2ZjqqAD0UP6UkbaMV9F_OEC6pBVaaX4TYnBKx3os", "token_expiry": "2024-01-30T11:20:53Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byBdrcgbmWKGyEUOxxuJBhxOs1uu0AqWeLgJKKKLG_dVg4iQKJAdiWD2oetHrKn17p4ZtfX-vt1VQ0BiF2MPD2exX1oESsQkXaO8q4TM1olIpadvlUBqUkqKJcjCqn1dp1oaTVYU-Srf2wQCGnDt3ozjljdkHXN_MQaCgYKAa4SARISFQHGX2MiWnixlrP3Se3vEV73_4fenA0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
|
||||
{"access_token": "ya29.a0AfB_byAZmDTDsYds_iatV8a30PUPWcDHVW4Cyg71pTlD0f3eBBwAjV4WpVL8LdAle8sT4j_rX4rWH8iCt3QI2YdrQLFPlaVdBk0zRGGtAEcebIDuQy_VKD6j5c3IGxok9PDON-Mft0ZVJjUVEopgLYA4fYwctbQZ8nyl4AaCgYKAX4SARISFQHGX2Mim-LRNXCfACmecJH94-D09A0173", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0ehtafHmucszRCgYIARAAGA4SNwF-L9IrpDBsnzdHKAlRfrkvzNFw1cpdnRY8rhM5gy4flsPYdysMav27yHamJx39BBGq-LLw40s", "token_expiry": "2024-01-31T14:41:25Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0AfB_byAZmDTDsYds_iatV8a30PUPWcDHVW4Cyg71pTlD0f3eBBwAjV4WpVL8LdAle8sT4j_rX4rWH8iCt3QI2YdrQLFPlaVdBk0zRGGtAEcebIDuQy_VKD6j5c3IGxok9PDON-Mft0ZVJjUVEopgLYA4fYwctbQZ8nyl4AaCgYKAX4SARISFQHGX2Mim-LRNXCfACmecJH94-D09A0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
|
||||
@@ -1,9 +1,9 @@
|
||||
import ctypes
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import func_timeout
|
||||
|
||||
from desktop_env.envs.desktop_env import DesktopEnv
|
||||
@@ -46,7 +46,6 @@ logger = logging.getLogger("desktopenv.experiment")
|
||||
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
|
||||
|
||||
|
||||
|
||||
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
|
||||
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
|
||||
env = DesktopEnv(
|
||||
@@ -123,9 +122,8 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
|
||||
logger.info("Environment closed.")
|
||||
|
||||
|
||||
def main(example_class, example_id):
|
||||
def main(example_class, example_id, gpt4_model="gpt-4-0125-preview"):
|
||||
action_space = "pyautogui"
|
||||
gpt4_model = "gpt-4-0125-preview"
|
||||
gemini_model = "gemini-pro-vision"
|
||||
|
||||
logger.info("Running example %s/%s", example_class, example_id)
|
||||
@@ -134,7 +132,7 @@ def main(example_class, example_id):
|
||||
|
||||
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
|
||||
example = json.load(f)
|
||||
example["snapshot"] = "exp_v1"
|
||||
example["snapshot"] = "exp_v5"
|
||||
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'], max_tokens=1000,
|
||||
@@ -154,25 +152,50 @@ def main(example_class, example_id):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
os_list = [
|
||||
"94d95f96-9699-4208-98ba-3c3119edf9c2",
|
||||
"bedcedc4-4d72-425e-ad62-21960b11fe0d",
|
||||
"43c2d64c-bab5-4dcb-a30c-b888321c319a",
|
||||
"7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82",
|
||||
"ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3",
|
||||
"f9be0997-4b7c-45c5-b05c-4612b44a6118",
|
||||
"28cc3b7e-b194-4bc9-8353-d04c0f4d56d2",
|
||||
"5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57",
|
||||
"e0df059f-28a6-4169-924f-b9623e7184cc",
|
||||
"ddc75b62-7311-4af8-bfb3-859558542b36",
|
||||
"b6781586-6346-41cd-935a-a6b1487918fc",
|
||||
"3ce045a0-877b-42aa-8d2c-b4a863336ab8",
|
||||
"a4d98375-215b-4a4d-aee9-3d4370fccc41",
|
||||
"13584542-872b-42d8-b299-866967b5c3ef",
|
||||
"23393935-50c7-4a86-aeea-2b78fd089c5c"
|
||||
]
|
||||
|
||||
# for example_id in os_list:
|
||||
# try:
|
||||
# main("os", example_id, gpt4_model="gpt-3.5-turbo-16k")
|
||||
# except Exception as e:
|
||||
# logger.error("An error occurred while running the example: %s", e)
|
||||
# continue
|
||||
|
||||
vlc_list = [
|
||||
# "8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
|
||||
# "8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
|
||||
# "8f080098-ddb1-424c-b438-4e96e5e4786e",
|
||||
# "bba3381f-b5eb-4439-bd9e-80c22218d5a7",
|
||||
# "fba2c100-79e8-42df-ae74-b592418d54f4",
|
||||
# "efcf0d81-0835-4880-b2fd-d866e8bc2294",
|
||||
# "8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f",
|
||||
# "aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6",
|
||||
# "386dbd0e-0241-4a0a-b6a2-6704fba26b1c",
|
||||
# "9195653c-f4aa-453d-aa95-787f6ccfaae9",
|
||||
# "d06f0d4d-2cd5-4ede-8de9-598629438c6e",
|
||||
# "a5bbbcd5-b398-4c91-83d4-55e1e31bbb81",
|
||||
"8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
|
||||
"8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
|
||||
"8f080098-ddb1-424c-b438-4e96e5e4786e",
|
||||
"bba3381f-b5eb-4439-bd9e-80c22218d5a7",
|
||||
"fba2c100-79e8-42df-ae74-b592418d54f4",
|
||||
"efcf0d81-0835-4880-b2fd-d866e8bc2294",
|
||||
"8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f",
|
||||
"aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6",
|
||||
"386dbd0e-0241-4a0a-b6a2-6704fba26b1c",
|
||||
"9195653c-f4aa-453d-aa95-787f6ccfaae9",
|
||||
"d06f0d4d-2cd5-4ede-8de9-598629438c6e",
|
||||
"a5bbbcd5-b398-4c91-83d4-55e1e31bbb81",
|
||||
"f3977615-2b45-4ac5-8bba-80c17dbe2a37",
|
||||
"215dfd39-f493-4bc3-a027-8a97d72c61bf"
|
||||
]
|
||||
|
||||
chrome_list = [
|
||||
# "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
|
||||
"bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
|
||||
"7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
|
||||
"06fe7178-4491-4589-810f-2e2bc9502122",
|
||||
"e1e75309-3ddb-4d09-92ec-de869c928143",
|
||||
@@ -215,5 +238,116 @@ if __name__ == '__main__':
|
||||
"4f07fbe9-70de-4927-a4d5-bb28bc12c52c",
|
||||
]
|
||||
|
||||
for example_id in calc_list:
|
||||
main("libreoffice_calc", example_id)
|
||||
# for example_id in calc_list:
|
||||
# main("libreoffice_calc", example_id)
|
||||
|
||||
impress_list = [
|
||||
# "5d901039-a89c-4bfb-967b-bf66f4df075e",
|
||||
# "550ce7e7-747b-495f-b122-acdc4d0b8e54",
|
||||
# "455d3c66-7dc6-4537-a39a-36d3e9119df7",
|
||||
# "af23762e-2bfd-4a1d-aada-20fa8de9ce07",
|
||||
# "c59742c0-4323-4b9d-8a02-723c251deaa0",
|
||||
# "ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
|
||||
# "9ec204e4-f0a3-42f8-8458-b772a6797cab",
|
||||
# "0f84bef9-9790-432e-92b7-eece357603fb",
|
||||
# "ce88f674-ab7a-43da-9201-468d38539e4a",
|
||||
# "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
|
||||
# "a097acff-6266-4291-9fbd-137af7ecd439",
|
||||
# "bf4e9888-f10f-47af-8dba-76413038b73c",
|
||||
"21760ecb-8f62-40d2-8d85-0cee5725cb72"
|
||||
]
|
||||
# for example_id in impress_list:
|
||||
# main("libreoffice_impress", example_id)
|
||||
|
||||
thunderbird_list = [
|
||||
# "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
|
||||
# "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
|
||||
"12086550-11c0-466b-b367-1d9e75b3910e",
|
||||
"06fe7178-4491-4589-810f-2e2bc9502122",
|
||||
"6766f2b8-8a72-417f-a9e5-56fcaa735837",
|
||||
"e1e75309-3ddb-4d09-92ec-de869c928143",
|
||||
"3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5",
|
||||
"35253b65-1c19-4304-8aa4-6884b8218fc0",
|
||||
"d088f539-cab4-4f9a-ac92-9999fc3a656e",
|
||||
"2ad9387a-65d8-4e33-ad5b-7580065a27ca",
|
||||
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
|
||||
"030eeff7-b492-4218-b312-701ec99ee0cc",
|
||||
"94760984-3ff5-41ee-8347-cf1af709fea0",
|
||||
"99146c54-4f37-4ab8-9327-5f3291665e1e",
|
||||
"c9e7eaf2-b1a1-4efc-a982-721972fa9f02"
|
||||
]
|
||||
# for example_id in thunderbird_list:
|
||||
# main("thunderbird", example_id)
|
||||
|
||||
gimp_list = [
|
||||
"7a4deb26-d57d-4ea9-9a73-630f66a7b568",
|
||||
"554785e9-4523-4e7a-b8e1-8016f565f56a",
|
||||
"77b8ab4d-994f-43ac-8930-8ca087d7c4b4",
|
||||
"f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce",
|
||||
"d52d6308-ec58-42b7-a2c9-de80e4837b2b",
|
||||
"2a729ded-3296-423d-aec4-7dd55ed5fbb3",
|
||||
"b148e375-fe0b-4bec-90e7-38632b0d73c2",
|
||||
"a746add2-cab0-4740-ac36-c3769d9bfb46",
|
||||
"7b7617bd-57cc-468e-9c91-40c4ec2bcb3d",
|
||||
"d16c99dc-2a1e-46f2-b350-d97c86c85c15",
|
||||
"06ca5602-62ca-47f6-ad4f-da151cde54cc",
|
||||
"e2dd0213-26db-4349-abe5-d5667bfd725c",
|
||||
"f723c744-e62c-4ae6-98d1-750d3cd7d79d",
|
||||
"72f83cdc-bf76-4531-9a1b-eb893a13f8aa",
|
||||
"7767eef2-56a3-4cea-8c9f-48c070c7d65b",
|
||||
"734d6579-c07d-47a8-9ae2-13339795476b"
|
||||
]
|
||||
|
||||
# for example_id in gimp_list:
|
||||
# try:
|
||||
# main("gimp", example_id)
|
||||
# except Exception as e:
|
||||
# logger.error("An error occurred while running the example: %s", e)
|
||||
# continue
|
||||
|
||||
vs_code_list = [
|
||||
"0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
|
||||
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
|
||||
"eabc805a-bfcf-4460-b250-ac92135819f6",
|
||||
"982d12a5-beab-424f-8d38-d2a48429e511",
|
||||
"4e60007a-f5be-4bfc-9723-c39affa0a6d3",
|
||||
"e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
|
||||
"9439a27b-18ae-42d8-9778-5f68f891805e",
|
||||
"ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
|
||||
"930fdb3b-11a8-46fe-9bac-577332e2640e",
|
||||
"276cc624-87ea-4f08-ab93-f770e3790175",
|
||||
"9d425400-e9b2-4424-9a4b-d4c7abac4140"
|
||||
]
|
||||
|
||||
# for example_id in vs_code_list:
|
||||
# try:
|
||||
# main("vs_code", example_id)
|
||||
# except Exception as e:
|
||||
# logger.error("An error occurred while running the example: %s", e)
|
||||
# continue
|
||||
|
||||
multiple_list = [
|
||||
"f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
|
||||
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
|
||||
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
|
||||
"b52b40a5-ad70-4c53-b5b0-5650a8387052",
|
||||
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
|
||||
"2b9493d7-49b8-493a-a71b-56cd1f4d6908",
|
||||
"51f5801c-18b3-4f25-b0c3-02f85507a078",
|
||||
"2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
|
||||
"510f64c8-9bcc-4be1-8d30-638705850618",
|
||||
"937087b6-f668-4ba6-9110-60682ee33441",
|
||||
"ee9a3c83-f437-4879-8918-be5efbb9fac7",
|
||||
"3680a5ee-6870-426a-a997-eba929a0d25c",
|
||||
"e135df7c-7687-4ac0-a5f0-76b74438b53e",
|
||||
"58565672-7bfe-48ab-b828-db349231de6b",
|
||||
"2fe4b718-3bd7-46ec-bdce-b184f5653624"
|
||||
]
|
||||
|
||||
for example_id in multiple_list:
|
||||
try:
|
||||
main("multi_apps", example_id)
|
||||
except Exception as e:
|
||||
logger.error("An error occurred while running the example: %s", e)
|
||||
continue
|
||||
|
||||
|
||||
@@ -134,7 +134,7 @@ def main(example_class, example_id):
|
||||
|
||||
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
|
||||
example = json.load(f)
|
||||
example["snapshot"] = "exp_v1"
|
||||
example["snapshot"] = "exp_v5"
|
||||
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space,
|
||||
@@ -168,17 +168,17 @@ if __name__ == '__main__':
|
||||
"af630914-714e-4a24-a7bb-f9af687d3b91"
|
||||
]
|
||||
calc_list = [
|
||||
"eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
|
||||
"0bf05a7d-b28b-44d2-955a-50b41e24012a",
|
||||
"7a4e4bc8-922c-4c84-865c-25ba34136be1",
|
||||
"2bd59342-0664-4ccb-ba87-79379096cc08",
|
||||
"ecb0df7a-4e8d-4a03-b162-053391d3afaf",
|
||||
"7efeb4b1-3d19-4762-b163-63328d66303b",
|
||||
"4e6fcf72-daf3-439f-a232-c434ce416af6",
|
||||
"6054afcb-5bab-4702-90a0-b259b5d3217c",
|
||||
"abed40dc-063f-4598-8ba5-9fe749c0615d",
|
||||
"01b269ae-2111-4a07-81fd-3fcd711993b0",
|
||||
"8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
|
||||
# "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
|
||||
# "0bf05a7d-b28b-44d2-955a-50b41e24012a",
|
||||
# "7a4e4bc8-922c-4c84-865c-25ba34136be1",
|
||||
# "2bd59342-0664-4ccb-ba87-79379096cc08",
|
||||
# "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
|
||||
# "7efeb4b1-3d19-4762-b163-63328d66303b",
|
||||
# "4e6fcf72-daf3-439f-a232-c434ce416af6",
|
||||
# "6054afcb-5bab-4702-90a0-b259b5d3217c",
|
||||
# "abed40dc-063f-4598-8ba5-9fe749c0615d",
|
||||
# "01b269ae-2111-4a07-81fd-3fcd711993b0",
|
||||
# "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
|
||||
"0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
|
||||
"4188d3a4-077d-46b7-9c86-23e1a036f6c1",
|
||||
"51b11269-2ca8-4b2a-9163-f21758420e78",
|
||||
@@ -197,5 +197,97 @@ if __name__ == '__main__':
|
||||
"4f07fbe9-70de-4927-a4d5-bb28bc12c52c",
|
||||
]
|
||||
|
||||
for example_id in calc_list:
|
||||
main("libreoffice_calc", example_id)
|
||||
# for example_id in calc_list:
|
||||
# main("libreoffice_calc", example_id)
|
||||
|
||||
impress_list = [
|
||||
# "5d901039-a89c-4bfb-967b-bf66f4df075e",
|
||||
# "550ce7e7-747b-495f-b122-acdc4d0b8e54",
|
||||
# "455d3c66-7dc6-4537-a39a-36d3e9119df7",
|
||||
# "af23762e-2bfd-4a1d-aada-20fa8de9ce07",
|
||||
# "c59742c0-4323-4b9d-8a02-723c251deaa0",
|
||||
# "ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
|
||||
# "9ec204e4-f0a3-42f8-8458-b772a6797cab",
|
||||
# "0f84bef9-9790-432e-92b7-eece357603fb",
|
||||
# "ce88f674-ab7a-43da-9201-468d38539e4a",
|
||||
# "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
|
||||
# "a097acff-6266-4291-9fbd-137af7ecd439",
|
||||
# "bf4e9888-f10f-47af-8dba-76413038b73c",
|
||||
"21760ecb-8f62-40d2-8d85-0cee5725cb72"
|
||||
]
|
||||
# for example_id in impress_list:
|
||||
# main("libreoffice_impress", example_id)
|
||||
|
||||
# gimp_list = [
|
||||
# "7a4deb26-d57d-4ea9-9a73-630f66a7b568",
|
||||
# "554785e9-4523-4e7a-b8e1-8016f565f56a",
|
||||
# "77b8ab4d-994f-43ac-8930-8ca087d7c4b4",
|
||||
# "f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce",
|
||||
# "d52d6308-ec58-42b7-a2c9-de80e4837b2b",
|
||||
# "2a729ded-3296-423d-aec4-7dd55ed5fbb3",
|
||||
# "b148e375-fe0b-4bec-90e7-38632b0d73c2",
|
||||
# "a746add2-cab0-4740-ac36-c3769d9bfb46",
|
||||
# "7b7617bd-57cc-468e-9c91-40c4ec2bcb3d",
|
||||
# "d16c99dc-2a1e-46f2-b350-d97c86c85c15",
|
||||
# "06ca5602-62ca-47f6-ad4f-da151cde54cc",
|
||||
# "e2dd0213-26db-4349-abe5-d5667bfd725c",
|
||||
# "f723c744-e62c-4ae6-98d1-750d3cd7d79d",
|
||||
# "72f83cdc-bf76-4531-9a1b-eb893a13f8aa",
|
||||
# "7767eef2-56a3-4cea-8c9f-48c070c7d65b",
|
||||
# "734d6579-c07d-47a8-9ae2-13339795476b"
|
||||
# ]
|
||||
#
|
||||
# for example_id in gimp_list:
|
||||
# try:
|
||||
# main("gimp", example_id)
|
||||
# except Exception as e:
|
||||
# logger.error("An error occurred while running the example: %s", e)
|
||||
# continue
|
||||
#
|
||||
|
||||
vs_code_list = [
|
||||
"0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
|
||||
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
|
||||
"eabc805a-bfcf-4460-b250-ac92135819f6",
|
||||
"982d12a5-beab-424f-8d38-d2a48429e511",
|
||||
"4e60007a-f5be-4bfc-9723-c39affa0a6d3",
|
||||
"e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
|
||||
"9439a27b-18ae-42d8-9778-5f68f891805e",
|
||||
"ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
|
||||
"930fdb3b-11a8-46fe-9bac-577332e2640e",
|
||||
"276cc624-87ea-4f08-ab93-f770e3790175",
|
||||
"9d425400-e9b2-4424-9a4b-d4c7abac4140"
|
||||
]
|
||||
|
||||
# for example_id in vs_code_list:
|
||||
# try:
|
||||
# main("vs_code", example_id)
|
||||
# except Exception as e:
|
||||
# logger.error("An error occurred while running the example: %s", e)
|
||||
# continue
|
||||
|
||||
multiple_list = [
|
||||
"f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
|
||||
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
|
||||
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
|
||||
"b52b40a5-ad70-4c53-b5b0-5650a8387052",
|
||||
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
|
||||
"2b9493d7-49b8-493a-a71b-56cd1f4d6908",
|
||||
"51f5801c-18b3-4f25-b0c3-02f85507a078",
|
||||
"2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
|
||||
"510f64c8-9bcc-4be1-8d30-638705850618",
|
||||
"937087b6-f668-4ba6-9110-60682ee33441",
|
||||
"ee9a3c83-f437-4879-8918-be5efbb9fac7",
|
||||
"3680a5ee-6870-426a-a997-eba929a0d25c",
|
||||
"e135df7c-7687-4ac0-a5f0-76b74438b53e",
|
||||
"58565672-7bfe-48ab-b828-db349231de6b",
|
||||
"2fe4b718-3bd7-46ec-bdce-b184f5653624"
|
||||
]
|
||||
|
||||
for example_id in multiple_list:
|
||||
try:
|
||||
main("multi_apps", example_id)
|
||||
except Exception as e:
|
||||
logger.error("An error occurred while running the example: %s", e)
|
||||
continue
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import ctypes
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
import func_timeout
|
||||
|
||||
from desktop_env.envs.desktop_env import DesktopEnv
|
||||
@@ -124,12 +124,11 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
|
||||
logger.info("Environment closed.")
|
||||
|
||||
|
||||
def main(example_class, example_id):
|
||||
def main(example_class, example_id, gpt4_model="gpt-4-vision-preview"):
|
||||
action_space = "pyautogui"
|
||||
# example_class = "libreoffice_calc"
|
||||
# example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
|
||||
# example_id = "01b269ae-2111-4a07-81fd-3fcd711993b0"
|
||||
gpt4_model = "gpt-4-vision-preview"
|
||||
gemini_model = "gemini-pro-vision"
|
||||
|
||||
logger.info("Running example %s/%s", example_class, example_id)
|
||||
@@ -138,7 +137,7 @@ def main(example_class, example_id):
|
||||
|
||||
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
|
||||
example = json.load(f)
|
||||
example["snapshot"] = "exp_v1"
|
||||
example["snapshot"] = "exp_v5"
|
||||
# example["snapshot"] = "exp_setup4"
|
||||
# example["snapshot"] = "Snapshot 30"
|
||||
|
||||
@@ -160,7 +159,133 @@ def main(example_class, example_id):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
xx_list = [
|
||||
os_list = [
|
||||
"94d95f96-9699-4208-98ba-3c3119edf9c2",
|
||||
"bedcedc4-4d72-425e-ad62-21960b11fe0d",
|
||||
"43c2d64c-bab5-4dcb-a30c-b888321c319a",
|
||||
"7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82",
|
||||
"ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3",
|
||||
"f9be0997-4b7c-45c5-b05c-4612b44a6118",
|
||||
"28cc3b7e-b194-4bc9-8353-d04c0f4d56d2",
|
||||
"5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57",
|
||||
"e0df059f-28a6-4169-924f-b9623e7184cc",
|
||||
"ddc75b62-7311-4af8-bfb3-859558542b36",
|
||||
"b6781586-6346-41cd-935a-a6b1487918fc",
|
||||
"3ce045a0-877b-42aa-8d2c-b4a863336ab8",
|
||||
"a4d98375-215b-4a4d-aee9-3d4370fccc41",
|
||||
"13584542-872b-42d8-b299-866967b5c3ef",
|
||||
"23393935-50c7-4a86-aeea-2b78fd089c5c"
|
||||
]
|
||||
for example_id in xx_list:
|
||||
main("xx", example_id)
|
||||
|
||||
# for example_id in os_list:
|
||||
# try:
|
||||
# main("os", example_id)
|
||||
# except Exception as e:
|
||||
# logger.error("An error occurred while running the example: %s", e)
|
||||
# continue
|
||||
|
||||
calc_list = [
|
||||
# "eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
|
||||
# "0bf05a7d-b28b-44d2-955a-50b41e24012a",
|
||||
# "7a4e4bc8-922c-4c84-865c-25ba34136be1",
|
||||
# "2bd59342-0664-4ccb-ba87-79379096cc08",
|
||||
# "ecb0df7a-4e8d-4a03-b162-053391d3afaf",
|
||||
# "7efeb4b1-3d19-4762-b163-63328d66303b",
|
||||
# "4e6fcf72-daf3-439f-a232-c434ce416af6",
|
||||
# "6054afcb-5bab-4702-90a0-b259b5d3217c",
|
||||
# "abed40dc-063f-4598-8ba5-9fe749c0615d",
|
||||
# "01b269ae-2111-4a07-81fd-3fcd711993b0",
|
||||
# "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
|
||||
# "0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
|
||||
# "4188d3a4-077d-46b7-9c86-23e1a036f6c1",
|
||||
# "51b11269-2ca8-4b2a-9163-f21758420e78",
|
||||
# "7e429b8d-a3f0-4ed0-9b58-08957d00b127",
|
||||
# "347ef137-7eeb-4c80-a3bb-0951f26a8aff",
|
||||
# "6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
|
||||
# "3aaa4e37-dc91-482e-99af-132a612d40f3",
|
||||
# "37608790-6147-45d0-9f20-1137bb35703d",
|
||||
# "f9584479-3d0d-4c79-affa-9ad7afdd8850",
|
||||
"d681960f-7bc3-4286-9913-a8812ba3261a",
|
||||
"21df9241-f8d7-4509-b7f1-37e501a823f7",
|
||||
"1334ca3e-f9e3-4db8-9ca7-b4c653be7d17",
|
||||
"357ef137-7eeb-4c80-a3bb-0951f26a8aff",
|
||||
"aa3a8974-2e85-438b-b29e-a64df44deb4b",
|
||||
"a01fbce3-2793-461f-ab86-43680ccbae25",
|
||||
"4f07fbe9-70de-4927-a4d5-bb28bc12c52c",
|
||||
]
|
||||
|
||||
# for example_id in calc_list:
|
||||
# try:
|
||||
# main("libreoffice_calc", example_id)
|
||||
# except Exception as e:
|
||||
# logger.error("An error occurred while running the example: %s", e)
|
||||
# continue
|
||||
|
||||
impress_list = [
|
||||
"5d901039-a89c-4bfb-967b-bf66f4df075e",
|
||||
"550ce7e7-747b-495f-b122-acdc4d0b8e54",
|
||||
"455d3c66-7dc6-4537-a39a-36d3e9119df7",
|
||||
"af23762e-2bfd-4a1d-aada-20fa8de9ce07",
|
||||
"c59742c0-4323-4b9d-8a02-723c251deaa0",
|
||||
"ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
|
||||
"9ec204e4-f0a3-42f8-8458-b772a6797cab",
|
||||
"0f84bef9-9790-432e-92b7-eece357603fb",
|
||||
"ce88f674-ab7a-43da-9201-468d38539e4a",
|
||||
"3b27600c-3668-4abd-8f84-7bcdebbccbdb",
|
||||
"a097acff-6266-4291-9fbd-137af7ecd439",
|
||||
"bf4e9888-f10f-47af-8dba-76413038b73c",
|
||||
"21760ecb-8f62-40d2-8d85-0cee5725cb72"
|
||||
]
|
||||
|
||||
# for example_id in impress_list:
|
||||
# try:
|
||||
# main("libreoffice_impress", example_id)
|
||||
# except Exception as e:
|
||||
# logger.error("An error occurred while running the example: %s", e)
|
||||
# continue
|
||||
|
||||
vs_code_list = [
|
||||
"0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
|
||||
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
|
||||
"eabc805a-bfcf-4460-b250-ac92135819f6",
|
||||
"982d12a5-beab-424f-8d38-d2a48429e511",
|
||||
"4e60007a-f5be-4bfc-9723-c39affa0a6d3",
|
||||
"e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
|
||||
"9439a27b-18ae-42d8-9778-5f68f891805e",
|
||||
"ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
|
||||
"930fdb3b-11a8-46fe-9bac-577332e2640e",
|
||||
"276cc624-87ea-4f08-ab93-f770e3790175",
|
||||
"9d425400-e9b2-4424-9a4b-d4c7abac4140"
|
||||
]
|
||||
|
||||
# for example_id in vs_code_list:
|
||||
# try:
|
||||
# main("vs_code", example_id)
|
||||
# except Exception as e:
|
||||
# logger.error("An error occurred while running the example: %s", e)
|
||||
# continue
|
||||
|
||||
multiple_list = [
|
||||
"f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
|
||||
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
|
||||
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
|
||||
"b52b40a5-ad70-4c53-b5b0-5650a8387052",
|
||||
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
|
||||
"2b9493d7-49b8-493a-a71b-56cd1f4d6908",
|
||||
"51f5801c-18b3-4f25-b0c3-02f85507a078",
|
||||
"2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
|
||||
"510f64c8-9bcc-4be1-8d30-638705850618",
|
||||
"937087b6-f668-4ba6-9110-60682ee33441",
|
||||
"ee9a3c83-f437-4879-8918-be5efbb9fac7",
|
||||
"3680a5ee-6870-426a-a997-eba929a0d25c",
|
||||
"e135df7c-7687-4ac0-a5f0-76b74438b53e",
|
||||
"58565672-7bfe-48ab-b828-db349231de6b",
|
||||
"2fe4b718-3bd7-46ec-bdce-b184f5653624"
|
||||
]
|
||||
|
||||
for example_id in multiple_list:
|
||||
try:
|
||||
main("multi_apps", example_id)
|
||||
except Exception as e:
|
||||
logger.error("An error occurred while running the example: %s", e)
|
||||
continue
|
||||
|
||||
@@ -129,7 +129,7 @@ def main(example_class, example_id):
|
||||
|
||||
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
|
||||
example = json.load(f)
|
||||
example["snapshot"] = "exp_v1"
|
||||
example["snapshot"] = "exp_v5"
|
||||
|
||||
api_key = os.environ.get("OPENAI_API_KEY")
|
||||
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
|
||||
|
||||
@@ -39,3 +39,4 @@ fastdtw
|
||||
odfpy
|
||||
openai
|
||||
func-timeout
|
||||
beautifulsoup4
|
||||
Reference in New Issue
Block a user