diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index 43492f4..b0f9b5b 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -4,6 +4,7 @@ import os.path import time import traceback import uuid +import tempfile from typing import Any, Union, Optional from typing import Dict, List @@ -431,29 +432,68 @@ class SetupController: for delete: query(str): query pattern string to search files or folder in google drive to delete, please refer to https://developers.google.com/drive/api/guides/search-files?hl=en about how to write query string. - By default, move all files/folders into trash (can be recovered). - trash(bool): whether to delete files permanently or move to trash. By default, trash=True, just move to trash. - TODO: other operations + trash(bool): whether to delete files permanently or move to trash. By default, trash=false, completely delete it. + for upload: + path(str): remote url to download file + dest(List[str]): the path in the google drive to store the downloaded file """ settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.yml') gauth = GoogleAuth(settings_file=settings_file) drive = GoogleDrive(gauth) + def mkdir_in_googledrive(paths: List[str]): + paths = [paths] if type(paths) != list else paths + parent_id = 'root' + for p in paths: + q = f'"{parent_id}" in parents and title = "{p}" and mimeType = "application/vnd.google-apps.folder" and trashed = false' + folder = drive.ListFile({'q': q}).GetList() + if len(folder) == 0: # not exists, create it + parents = {} if parent_id == 'root' else {'parents': [{'id': parent_id}]} + file = drive.CreateFile({'title': p, 'mimeType':'application/vnd.google-apps.folder', **parents}) + file.Upload() + parent_id = file['id'] + else: parent_id = folder[0]['id'] + return parent_id + for oid, operation in enumerate(config['operation']): if operation == 'delete': # delete a specific file # query pattern string, by default, remove all files/folders not in the trash to the trash params = config['args'][oid] - q = params.get('query', 'trashed = false') - trash = params.get('trash', True) - filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList() - for file in filelist: + q = params.get('query', '') + trash = params.get('trash', False) + q_file = f"( {q} ) and mimeType != 'application/vnd.google-apps.folder'" if q.strip() else "mimeType != 'application/vnd.google-apps.folder'" + filelist: GoogleDriveFileList = drive.ListFile({'q': q_file}).GetList() + q_folder = f"( {q} ) and mimeType = 'application/vnd.google-apps.folder'" if q.strip() else "mimeType = 'application/vnd.google-apps.folder'" + folderlist: GoogleDriveFileList = drive.ListFile({'q': q_folder}).GetList() + for file in filelist: # first delete file, then folder file: GoogleDriveFile - # note that, if a folder is trashed/deleted, all files and folders in it will be trashed/deleted - # this is the same for UnTrash if trash: file.Trash() else: file.Delete() + for folder in folderlist: + folder: GoogleDriveFile + # note that, if a folder is trashed/deleted, all files and folders in it will be trashed/deleted + if trash: folder.Trash() + else: folder.Delete() + elif operation == 'mkdirs': + params = config['args'][oid] + mkdir_in_googledrive(params['path']) elif operation == 'upload': - pass + params = config['args'][oid] + url = params['url'] + with tempfile.NamedTemporaryFile(mode='wb', delete=False) as tmpf: + response = requests.get(url, stream=True) + response.raise_for_status() + for chunk in response.iter_content(chunk_size=8192): + if chunk: + tmpf.write(chunk) + tmpf.close() + paths = [params['path']] if params['path'] != list else params['path'] + parent_id = mkdir_in_googledrive(paths[:-1]) + parents = {} if parent_id == 'root' else {'parents': [{'id': parent_id}]} + file = drive.CreateFile({'title': paths[-1], **parents}) + file.SetContentFile(tmpf.name) + file.Upload() + return else: raise ValueError('[ERROR]: not implemented clean type!') diff --git a/desktop_env/evaluators/getters/__init__.py b/desktop_env/evaluators/getters/__init__.py index 0ce7ea7..c310769 100644 --- a/desktop_env/evaluators/getters/__init__.py +++ b/desktop_env/evaluators/getters/__init__.py @@ -1,6 +1,6 @@ from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, \ get_shortcuts_on_desktop, get_history, get_enabled_experiments, get_chrome_language, get_chrome_font_size, \ - get_profile_name, get_number_of_search_results, get_googledrive_file + get_profile_name, get_number_of_search_results, get_googledrive_file, get_active_tab_info from .file import get_cloud_file, get_vm_file, get_cache_file from .general import get_vm_command_line from .impress import get_audio_in_slide diff --git a/desktop_env/evaluators/getters/chrome.py b/desktop_env/evaluators/getters/chrome.py index 2724381..89145c1 100644 --- a/desktop_env/evaluators/getters/chrome.py +++ b/desktop_env/evaluators/getters/chrome.py @@ -466,44 +466,36 @@ def get_number_of_search_results(env, config: Dict[str, str]): def get_googledrive_file(env, config: Dict[str, Any]) -> str: """ Get the desired file from Google Drive based on config, return the downloaded local filepath. + To retrieve the file, we provide two options in config dict: + 1. query: a list of queries to search the file, each query is a string that follows the format of Google Drive search query + 2. path: a list of path to the file, 'folder/subfolder/filename' -> ['folder', 'subfolder', 'filename'] + Return the downloaded filepath locally. """ settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json') auth = GoogleAuth(settings_file=settings_file) drive = GoogleDrive(auth) - - q = config['query'] - filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList() - if len(filelist) == 0: # target file not found - return None - - file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just download the first one _path = os.path.join(env.cache_dir, config['dest']) - try: - file.GetContentFile(_path, mimetype=file.metadata['mimeType']) - except: - logger.info('[ERROR]: Failed to download the file from Google Drive') - return None - return _path - - -def get_googledrive_file(env, config: Dict[str, Any]) -> str: - """ Get the desired file from Google Drive based on config, return the downloaded local filepath. - """ - settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json') - auth = GoogleAuth(settings_file=settings_file) - drive = GoogleDrive(auth) - - q = config['query'] - filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList() - if len(filelist) == 0: # target file not found - return None - - file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just download the first one - _path = os.path.join(env.cache_dir, config['dest']) + if 'query' in config: + query = config['query'] + if type(query) != list: query = [query] + else: + paths = config['path'] + if type(paths) != list: paths = [paths] + query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(paths) - 1 + else f'title = {fp} and trashed = false' for idx, fp in enumerate(paths)] + parent_id = 'root' try: - file.GetContentFile(_path, mimetype=file.metadata['mimeType']) + for q in query: + search = f'( {q} ) and "{parent_id}" in parents' + filelist: GoogleDriveFileList = drive.ListFile({'q': search}).GetList() + if len(filelist) == 0: # target file not found + return None + file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just use the first one + parent_id = file['id'] + + file.GetContentFile(_path, mimetype=file['mimeType']) except: logger.info('[ERROR]: Failed to download the file from Google Drive') return None diff --git a/evaluation_examples/examples/multi_apps/4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc.json b/evaluation_examples/examples/multi_apps/4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc.json new file mode 100644 index 0000000..f0b6a9e --- /dev/null +++ b/evaluation_examples/examples/multi_apps/4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc.json @@ -0,0 +1,88 @@ +{ + "id": "4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc", + "snapshot": "chrome", + "instruction": "Could you help me extract data in the table from a new invoice uploaded to my Google Drive, then export it to a Libreoffice calc .xlsx file in the desktop?", + "source": "https://marketplace.uipath.com/listings/extract-data-from-a-new-invoice-file-in-google-drive-and-store-it-in-google-sheets4473", + "config": [ + { + "type": "googledrive", + "parameters": { + "settings_file": "evaluation_examples/settings/googledrive/settings.yml", + "operation": ["delete", "upload"], + "args": [ + { + "query": "title = 'invoice.pdf'", + "trash": false + }, + { + "url": "https://drive.usercontent.google.com/download?id=1KAhoPFM0AU2dgn_NRt3y7CjOr9Er4vwD&export=download&authuser=0&confirm=t&uuid=e8528cd1-5106-45f3-a644-e1bbf5e08278&at=APZUnTUnTuXfV2Ted_9Wv2QomMvA:1706181110208", + "path": ["invoice.pdf"] + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "google-chrome", + "--remote-debugging-port=1337" + ] + } + }, + { + "type": "launch", + "parameters": { + "command": [ + "socat", + "tcp-listen:9222,fork", + "tcp:localhost:1337" + ] + } + }, + { + "type": "chrome_open_tabs", + "parameters": { + "urls_to_open": [ + "https://www.freerice.com/", + "https://www.hku.hk/", + "https://about.meta.com/technologies/facebook-app/" + ] + } + }, + { + "type": "login", + "parameters": { + "settings_file": "evaluation_examples/settings/google/settings.json", + "platform": "googledrive" + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "libreoffice_calc", + "chrome" + ], + "evaluator": { + "func": "compare_table", + "result": { + "type": "vm_file", + "path": "/home/user/Desktop/invoice.xlsx", + "dest": "invoice.xlsx" + }, + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1gkATnr8bk4JKQbzXZvzifoAQUA2sx5da&export=download&authuser=0&confirm=t&uuid=64ed0549-1627-49e8-8228-1e1925d6f6f7&at=APZUnTXkCm24SrOPuO5C6v4M3BiB:1706181091638", + "dest": "invoice_gold.xlsx" + }, + "options": { + "rules": [ + { + "type": "sheet_data", + "sheet_idx0": "RI0", + "sheet_idx1": "EI0" + } + ] + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/multi_apps/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json b/evaluation_examples/examples/multi_apps/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json index 6bf6538..61d8bea 100644 --- a/evaluation_examples/examples/multi_apps/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json +++ b/evaluation_examples/examples/multi_apps/897e3b53-5d4d-444b-85cb-2cdc8a97d903.json @@ -1,7 +1,7 @@ { "id": "897e3b53-5d4d-444b-85cb-2cdc8a97d903", "snapshot": "chrome", - "instruction": "I have a LibreOffice Writer file form.docx on the desktop. Help me convert it to PDF format and store it in the forms/ folder in Google Drive.", + "instruction": "I have a LibreOffice Writer file form.docx on the desktop. Help me convert it to PDF format and store it in the forms/ folder in my Google Drive.", "source": "https://marketplace.uipath.com/listings/convert-word-file-to-pdf-and-store-in-onedrive", "config": [ { @@ -11,7 +11,7 @@ "operation": ["delete"], "args": [ { - "query": "title = 'form.pdf' or title = 'form.docx' or title = 'form.docx.pdf'", + "query": "title = 'form.pdf' or title = 'form.docx' or title = 'form.docx.pdf' or title = 'forms'", "trash": false } ] @@ -81,7 +81,10 @@ "result": { "type": "googledrive_file", "settings_file": "evaluation_examples/settings/googledrive/settings.yml", - "query": "( title = 'form.pdf' or title = 'form.docx.pdf' ) and trashed = false", + "query": [ + "title = 'forms' and mimeType = 'application/vnd.google-apps.folder' and trashed = false", + "( title = 'form.pdf' or title = 'form.docx.pdf' ) and trashed = false" + ], "dest": "form.pdf" }, "expected": {