update examples

This commit is contained in:
rhythmcao
2024-01-26 00:53:35 +08:00
parent 8de14f4cbc
commit 5ac80dc309
12 changed files with 818 additions and 27 deletions

View File

@@ -536,7 +536,7 @@ class SetupController:
page.goto(url)
logger.info(f"Opened new page: {url}")
settings = json.load(open(config['settings_file']))
email, password = settings['account'], settings['password']
email, password = settings['email'], settings['password']
try:
page.wait_for_selector('input[type="email"]', state="visible", timeout=3000)

View File

@@ -469,34 +469,51 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
To retrieve the file, we provide two options in config dict:
1. query: a list of queries to search the file, each query is a string that follows the format of Google Drive search query
2. path: a list of path to the file, 'folder/subfolder/filename' -> ['folder', 'subfolder', 'filename']
3. query_list: query extends to list to download multiple files
4. path_list: path extends to list to download multiple files
dest: target file name or list. If *_list is used, dest should also be a list of the same length.
Return the downloaded filepath locally.
"""
settings_file = config.get('settings_file', 'evaluation_examples/settings/googledrive/settings.json')
auth = GoogleAuth(settings_file=settings_file)
drive = GoogleDrive(auth)
_path = os.path.join(env.cache_dir, config['dest'])
def get_single_file(_query, _path):
parent_id = 'root'
try:
for q in _query:
search = f'( {q} ) and "{parent_id}" in parents'
filelist: GoogleDriveFileList = drive.ListFile({'q': search}).GetList()
if len(filelist) == 0: # target file not found
return None
file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just use the first one
parent_id = file['id']
file.GetContentFile(_path, mimetype=file['mimeType'])
except:
logger.info('[ERROR]: Failed to download the file from Google Drive')
return None
return _path
if 'query' in config:
query = config['query']
if type(query) != list: query = [query]
else:
paths = config['path']
if type(paths) != list: paths = [paths]
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(paths) - 1
else f'title = {fp} and trashed = false' for idx, fp in enumerate(paths)]
parent_id = 'root'
try:
for q in query:
search = f'( {q} ) and "{parent_id}" in parents'
filelist: GoogleDriveFileList = drive.ListFile({'q': search}).GetList()
if len(filelist) == 0: # target file not found
return None
file: GoogleDriveFile = filelist[0] # HACK: if multiple candidates, just use the first one
parent_id = file['id']
file.GetContentFile(_path, mimetype=file['mimeType'])
except:
logger.info('[ERROR]: Failed to download the file from Google Drive')
return None
return _path
return get_single_file(config['query'], os.path.join(env.cache_dir, config['dest']))
elif 'path' in config:
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if idx < len(config['path']) - 1
else f'title = {fp} and trashed = false' for idx, fp in enumerate(config['path'])]
return get_single_file(query, os.path.join(env.cache_dir, config['dest']))
elif 'query_list' in config:
_path_list = []
assert len(config['query_list']) == len(config['dest'])
for idx, query in enumerate(config['query_list']):
dest = config['dest'][idx]
_path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
return _path_list
else: # path_list in config
_path_list = []
assert len(config['path_list']) == len(config['dest'])
for idx, path in enumerate(config['path_list']):
query = [f"title = {fp} and mimeType = 'application/vnd.google-apps.folder' and trashed = false" if jdx < len(path) - 1
else f'title = {fp} and trashed = false' for jdx, fp in enumerate(path)]
dest = config['dest'][idx]
_path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
return _path_list