Merge remote-tracking branch 'origin/main'

This commit is contained in:
Timothyxxx
2024-01-26 00:15:16 +08:00
5 changed files with 167 additions and 44 deletions

View File

@@ -4,6 +4,7 @@ import os.path
import time import time
import traceback import traceback
import uuid import uuid
import tempfile
from typing import Any, Union, Optional from typing import Any, Union, Optional
from typing import Dict, List from typing import Dict, List
@@ -431,29 +432,68 @@ class SetupController:
for delete: for delete:
query(str): query pattern string to search files or folder in google drive to delete, please refer to query(str): query pattern string to search files or folder in google drive to delete, please refer to
https://developers.google.com/drive/api/guides/search-files?hl=en about how to write query string. https://developers.google.com/drive/api/guides/search-files?hl=en about how to write query string.
By default, move all files/folders into trash (can be recovered). trash(bool): whether to delete files permanently or move to trash. By default, trash=false, completely delete it.
trash(bool): whether to delete files permanently or move to trash. By default, trash=True, just move to trash. for upload:
TODO: other operations path(str): remote url to download file
dest(List[str]): the path in the google drive to store the downloaded file
""" """
settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.yml') settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.yml')
gauth = GoogleAuth(settings_file=settings_file) gauth = GoogleAuth(settings_file=settings_file)
drive = GoogleDrive(gauth) drive = GoogleDrive(gauth)
def mkdir_in_googledrive(paths: List[str]):
paths = [paths] if type(paths) != list else paths
parent_id = 'root'
for p in paths:
q = f'"{parent_id}" in parents and title = "{p}" and mimeType = "application/vnd.google-apps.folder" and trashed = false'
folder = drive.ListFile({'q': q}).GetList()
if len(folder) == 0: # not exists, create it
parents = {} if parent_id == 'root' else {'parents': [{'id': parent_id}]}
file = drive.CreateFile({'title': p, 'mimeType':'application/vnd.google-apps.folder', **parents})
file.Upload()
parent_id = file['id']
else: parent_id = folder[0]['id']
return parent_id
for oid, operation in enumerate(config['operation']): for oid, operation in enumerate(config['operation']):
if operation == 'delete': # delete a specific file if operation == 'delete': # delete a specific file
# query pattern string, by default, remove all files/folders not in the trash to the trash # query pattern string, by default, remove all files/folders not in the trash to the trash
params = config['args'][oid] params = config['args'][oid]
q = params.get('query', 'trashed = false') q = params.get('query', '')
trash = params.get('trash', True) trash = params.get('trash', False)
filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList() q_file = f"( {q} ) and mimeType != 'application/vnd.google-apps.folder'" if q.strip() else "mimeType != 'application/vnd.google-apps.folder'"
for file in filelist: filelist: GoogleDriveFileList = drive.ListFile({'q': q_file}).GetList()
q_folder = f"( {q} ) and mimeType = 'application/vnd.google-apps.folder'" if q.strip() else "mimeType = 'application/vnd.google-apps.folder'"
folderlist: GoogleDriveFileList = drive.ListFile({'q': q_folder}).GetList()
for file in filelist: # first delete file, then folder
file: GoogleDriveFile file: GoogleDriveFile
# note that, if a folder is trashed/deleted, all files and folders in it will be trashed/deleted
# this is the same for UnTrash
if trash: file.Trash() if trash: file.Trash()
else: file.Delete() else: file.Delete()
for folder in folderlist:
folder: GoogleDriveFile
# note that, if a folder is trashed/deleted, all files and folders in it will be trashed/deleted
if trash: folder.Trash()
else: folder.Delete()
elif operation == 'mkdirs':
params = config['args'][oid]
mkdir_in_googledrive(params['path'])
elif operation == 'upload': elif operation == 'upload':
pass params = config['args'][oid]
url = params['url']
with tempfile.NamedTemporaryFile(mode='wb', delete=False) as tmpf:
response = requests.get(url, stream=True)
response.raise_for_status()
for chunk in response.iter_content(chunk_size=8192):
if chunk:
tmpf.write(chunk)
tmpf.close()
paths = [params['path']] if params['path'] != list else params['path']
parent_id = mkdir_in_googledrive(paths[:-1])
parents = {} if parent_id == 'root' else {'parents': [{'id': parent_id}]}
file = drive.CreateFile({'title': paths[-1], **parents})
file.SetContentFile(tmpf.name)
file.Upload()
return
else: else:
raise ValueError('[ERROR]: not implemented clean type!') raise ValueError('[ERROR]: not implemented clean type!')

View File

@@ -1,6 +1,6 @@
from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, \ from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, \
get_shortcuts_on_desktop, get_history, get_enabled_experiments, get_chrome_language, get_chrome_font_size, \ get_shortcuts_on_desktop, get_history, get_enabled_experiments, get_chrome_language, get_chrome_font_size, \
get_profile_name, get_number_of_search_results, get_googledrive_file get_profile_name, get_number_of_search_results, get_googledrive_file, get_active_tab_info
from .file import get_cloud_file, get_vm_file, get_cache_file from .file import get_cloud_file, get_vm_file, get_cache_file
from .general import get_vm_command_line from .general import get_vm_command_line
from .impress import get_audio_in_slide from .impress import get_audio_in_slide

View File

@@ -466,44 +466,36 @@ def get_number_of_search_results(env, config: Dict[str, str]):
def get_googledrive_file(env, config: Dict[str, Any]) -> str: def get_googledrive_file(env, config: Dict[str, Any]) -> str:
""" Get the desired file from Google Drive based on config, return the downloaded local filepath. """ Get the desired file from Google Drive based on config, return the downloaded local filepath.
To retrieve the file, we provide two options in config dict:
1. query: a list of queries to search the file, each query is a string that follows the format of Google Drive search query
2. path: a list of path to the file, 'folder/subfolder/filename' -> ['folder', 'subfolder', 'filename']
Return the downloaded filepath locally.
""" """
settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json') settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json')
auth = GoogleAuth(settings_file=settings_file) auth = GoogleAuth(settings_file=settings_file)
drive = GoogleDrive(auth) drive = GoogleDrive(auth)
q = config['query']
filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList()
if len(filelist) == 0: # target file not found
return None
file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just download the first one
_path = os.path.join(env.cache_dir, config['dest']) _path = os.path.join(env.cache_dir, config['dest'])
try: if 'query' in config:
file.GetContentFile(_path, mimetype=file.metadata['mimeType']) query = config['query']
except: if type(query) != list: query = [query]
logger.info('[ERROR]: Failed to download the file from Google Drive') else:
return None paths = config['path']
return _path if type(paths) != list: paths = [paths]
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(paths) - 1
else f'title = {fp} and trashed = false' for idx, fp in enumerate(paths)]
def get_googledrive_file(env, config: Dict[str, Any]) -> str: parent_id = 'root'
""" Get the desired file from Google Drive based on config, return the downloaded local filepath.
"""
settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json')
auth = GoogleAuth(settings_file=settings_file)
drive = GoogleDrive(auth)
q = config['query']
filelist: GoogleDriveFileList = drive.ListFile({'q': q}).GetList()
if len(filelist) == 0: # target file not found
return None
file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just download the first one
_path = os.path.join(env.cache_dir, config['dest'])
try: try:
file.GetContentFile(_path, mimetype=file.metadata['mimeType']) for q in query:
search = f'( {q} ) and "{parent_id}" in parents'
filelist: GoogleDriveFileList = drive.ListFile({'q': search}).GetList()
if len(filelist) == 0: # target file not found
return None
file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just use the first one
parent_id = file['id']
file.GetContentFile(_path, mimetype=file['mimeType'])
except: except:
logger.info('[ERROR]: Failed to download the file from Google Drive') logger.info('[ERROR]: Failed to download the file from Google Drive')
return None return None

View File

@@ -0,0 +1,88 @@
{
"id": "4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
"snapshot": "chrome",
"instruction": "Could you help me extract data in the table from a new invoice uploaded to my Google Drive, then export it to a Libreoffice calc .xlsx file in the desktop?",
"source": "https://marketplace.uipath.com/listings/extract-data-from-a-new-invoice-file-in-google-drive-and-store-it-in-google-sheets4473",
"config": [
{
"type": "googledrive",
"parameters": {
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
"operation": ["delete", "upload"],
"args": [
{
"query": "title = 'invoice.pdf'",
"trash": false
},
{
"url": "https://drive.usercontent.google.com/download?id=1KAhoPFM0AU2dgn_NRt3y7CjOr9Er4vwD&export=download&authuser=0&confirm=t&uuid=e8528cd1-5106-45f3-a644-e1bbf5e08278&at=APZUnTUnTuXfV2Ted_9Wv2QomMvA:1706181110208",
"path": ["invoice.pdf"]
}
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
"type": "launch",
"parameters": {
"command": [
"socat",
"tcp-listen:9222,fork",
"tcp:localhost:1337"
]
}
},
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": [
"https://www.freerice.com/",
"https://www.hku.hk/",
"https://about.meta.com/technologies/facebook-app/"
]
}
},
{
"type": "login",
"parameters": {
"settings_file": "evaluation_examples/settings/google/settings.json",
"platform": "googledrive"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"libreoffice_calc",
"chrome"
],
"evaluator": {
"func": "compare_table",
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/invoice.xlsx",
"dest": "invoice.xlsx"
},
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1gkATnr8bk4JKQbzXZvzifoAQUA2sx5da&export=download&authuser=0&confirm=t&uuid=64ed0549-1627-49e8-8228-1e1925d6f6f7&at=APZUnTXkCm24SrOPuO5C6v4M3BiB:1706181091638",
"dest": "invoice_gold.xlsx"
},
"options": {
"rules": [
{
"type": "sheet_data",
"sheet_idx0": "RI0",
"sheet_idx1": "EI0"
}
]
}
}
}

View File

@@ -1,7 +1,7 @@
{ {
"id": "897e3b53-5d4d-444b-85cb-2cdc8a97d903", "id": "897e3b53-5d4d-444b-85cb-2cdc8a97d903",
"snapshot": "chrome", "snapshot": "chrome",
"instruction": "I have a LibreOffice Writer file form.docx on the desktop. Help me convert it to PDF format and store it in the forms/ folder in Google Drive.", "instruction": "I have a LibreOffice Writer file form.docx on the desktop. Help me convert it to PDF format and store it in the forms/ folder in my Google Drive.",
"source": "https://marketplace.uipath.com/listings/convert-word-file-to-pdf-and-store-in-onedrive", "source": "https://marketplace.uipath.com/listings/convert-word-file-to-pdf-and-store-in-onedrive",
"config": [ "config": [
{ {
@@ -11,7 +11,7 @@
"operation": ["delete"], "operation": ["delete"],
"args": [ "args": [
{ {
"query": "title = 'form.pdf' or title = 'form.docx' or title = 'form.docx.pdf'", "query": "title = 'form.pdf' or title = 'form.docx' or title = 'form.docx.pdf' or title = 'forms'",
"trash": false "trash": false
} }
] ]
@@ -81,7 +81,10 @@
"result": { "result": {
"type": "googledrive_file", "type": "googledrive_file",
"settings_file": "evaluation_examples/settings/googledrive/settings.yml", "settings_file": "evaluation_examples/settings/googledrive/settings.yml",
"query": "( title = 'form.pdf' or title = 'form.docx.pdf' ) and trashed = false", "query": [
"title = 'forms' and mimeType = 'application/vnd.google-apps.folder' and trashed = false",
"( title = 'form.pdf' or title = 'form.docx.pdf' ) and trashed = false"
],
"dest": "form.pdf" "dest": "form.pdf"
}, },
"expected": { "expected": {