update examples
This commit is contained in:
@@ -469,34 +469,51 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
|
||||
To retrieve the file, we provide two options in config dict:
|
||||
1. query: a list of queries to search the file, each query is a string that follows the format of Google Drive search query
|
||||
2. path: a list of path to the file, 'folder/subfolder/filename' -> ['folder', 'subfolder', 'filename']
|
||||
3. query_list: query extends to list to download multiple files
|
||||
4. path_list: path extends to list to download multiple files
|
||||
dest: target file name or list. If *_list is used, dest should also be a list of the same length.
|
||||
Return the downloaded filepath locally.
|
||||
"""
|
||||
settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json')
|
||||
auth = GoogleAuth(settings_file=settings_file)
|
||||
drive = GoogleDrive(auth)
|
||||
_path = os.path.join(env.cache_dir, config['dest'])
|
||||
|
||||
def get_single_file(_query, _path):
|
||||
parent_id = 'root'
|
||||
try:
|
||||
for q in _query:
|
||||
search = f'( {q} ) and "{parent_id}" in parents'
|
||||
filelist: GoogleDriveFileList = drive.ListFile({'q': search}).GetList()
|
||||
if len(filelist) == 0: # target file not found
|
||||
return None
|
||||
file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just use the first one
|
||||
parent_id = file['id']
|
||||
|
||||
file.GetContentFile(_path, mimetype=file['mimeType'])
|
||||
except:
|
||||
logger.info('[ERROR]: Failed to download the file from Google Drive')
|
||||
return None
|
||||
return _path
|
||||
|
||||
if 'query' in config:
|
||||
query = config['query']
|
||||
if type(query) != list: query = [query]
|
||||
else:
|
||||
paths = config['path']
|
||||
if type(paths) != list: paths = [paths]
|
||||
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(paths) - 1
|
||||
else f'title = {fp} and trashed = false' for idx, fp in enumerate(paths)]
|
||||
parent_id = 'root'
|
||||
|
||||
try:
|
||||
for q in query:
|
||||
search = f'( {q} ) and "{parent_id}" in parents'
|
||||
filelist: GoogleDriveFileList = drive.ListFile({'q': search}).GetList()
|
||||
if len(filelist) == 0: # target file not found
|
||||
return None
|
||||
file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just use the first one
|
||||
parent_id = file['id']
|
||||
|
||||
file.GetContentFile(_path, mimetype=file['mimeType'])
|
||||
except:
|
||||
logger.info('[ERROR]: Failed to download the file from Google Drive')
|
||||
return None
|
||||
return _path
|
||||
return get_single_file(config['query'], os.path.join(env.cache_dir, config['dest']))
|
||||
elif 'path' in config:
|
||||
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(config['path']) - 1
|
||||
else f'title = {fp} and trashed = false' for idx, fp in enumerate(config['path'])]
|
||||
return get_single_file(query, os.path.join(env.cache_dir, config['dest']))
|
||||
elif 'query_list' in config:
|
||||
_path_list = []
|
||||
assert len(config['query_list']) == len(config['dest'])
|
||||
for idx, query in enumerate(config['query_list']):
|
||||
dest = config['dest'][idx]
|
||||
_path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
|
||||
return _path_list
|
||||
else: # path_list in config
|
||||
_path_list = []
|
||||
assert len(config['path_list']) == len(config['dest'])
|
||||
for idx, path in enumerate(config['path_list']):
|
||||
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(path) - 1
|
||||
else f'title = {fp} and trashed = false' for jdx, fp in enumerate(path)]
|
||||
dest = config['dest'][idx]
|
||||
_path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
|
||||
return _path_list
|
||||
Reference in New Issue
Block a user