699 lines
29 KiB
Python
699 lines
29 KiB
Python
# todo: unifiy all the experiments python file into one file
|
|
import datetime
|
|
import json
|
|
import logging
|
|
import os
|
|
import sys
|
|
|
|
# import eventlet
|
|
import func_timeout
|
|
from func_timeout import FunctionTimedOut
|
|
|
|
from desktop_env.envs.desktop_env import DesktopEnv
|
|
from mm_agents.gpt_4v_agent import GPT4v_Agent
|
|
|
|
# eventlet.monkey_patch()
|
|
|
|
# from mm_agents.gemini_pro_agent import GeminiPro_Agent
|
|
|
|
# Logger Configs {{{ #
|
|
logger = logging.getLogger()
|
|
logger.setLevel(logging.DEBUG)
|
|
|
|
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
|
|
|
|
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
|
|
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
|
|
stdout_handler = logging.StreamHandler(sys.stdout)
|
|
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
|
|
|
|
file_handler.setLevel(logging.INFO)
|
|
debug_handler.setLevel(logging.DEBUG)
|
|
stdout_handler.setLevel(logging.INFO)
|
|
sdebug_handler.setLevel(logging.DEBUG)
|
|
|
|
formatter = logging.Formatter(
|
|
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
|
|
file_handler.setFormatter(formatter)
|
|
debug_handler.setFormatter(formatter)
|
|
stdout_handler.setFormatter(formatter)
|
|
sdebug_handler.setFormatter(formatter)
|
|
|
|
stdout_handler.addFilter(logging.Filter("desktopenv"))
|
|
sdebug_handler.addFilter(logging.Filter("desktopenv"))
|
|
|
|
logger.addHandler(file_handler)
|
|
logger.addHandler(debug_handler)
|
|
logger.addHandler(stdout_handler)
|
|
logger.addHandler(sdebug_handler)
|
|
# }}} Logger Configs #
|
|
|
|
logger = logging.getLogger("desktopenv.experiment")
|
|
|
|
# todo: move the PATH_TO_VM to the argparser
|
|
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
|
|
|
|
|
|
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True,
|
|
max_time=600):
|
|
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
|
|
env = DesktopEnv(
|
|
path_to_vm=PATH_TO_VM,
|
|
action_space=agent.action_space,
|
|
task_config=example,
|
|
headless=True
|
|
)
|
|
# reset the environment to certain snapshot
|
|
observation = env.reset()
|
|
done = False
|
|
step_num = 0
|
|
|
|
if recording:
|
|
# send a request to the server to start recording
|
|
env.controller.start_recording()
|
|
|
|
while not done and step_num < max_steps:
|
|
actions = agent.predict(observation)
|
|
step_num += 1
|
|
for action in actions:
|
|
# Capture the timestamp before executing the action
|
|
action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
|
|
logger.info("Step %d: %s", step_num, action)
|
|
|
|
observation, reward, done, info = env.step(action)
|
|
|
|
logger.info("Reward: %.2f", reward)
|
|
logger.info("Done: %s", done)
|
|
logger.info("Info: %s", info)
|
|
|
|
# Save screenshot and trajectory information
|
|
with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
|
|
with open(observation['screenshot'], "rb") as __f:
|
|
screenshot = __f.read()
|
|
_f.write(screenshot)
|
|
|
|
with open(trajectory_recording_path, "a") as f:
|
|
f.write(json.dumps({
|
|
"step_num": step_num,
|
|
"action_timestamp": action_timestamp,
|
|
"action": action,
|
|
"reward": reward,
|
|
"done": done,
|
|
"info": info,
|
|
"screenshot_file": f"step_{step_num}_{action_timestamp}.png"
|
|
}))
|
|
f.write("\n")
|
|
|
|
if done:
|
|
logger.info("The episode is done.")
|
|
break
|
|
|
|
def stop_recording():
|
|
try:
|
|
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
|
|
except Exception as e:
|
|
print(f"An error occurred while stopping the recording: {e}")
|
|
|
|
try:
|
|
func_timeout.func_timeout(120, stop_recording)
|
|
# todo: make sure we got the video file, check the bug
|
|
except func_timeout.exceptions.FunctionTimedOut:
|
|
logger.info("Recording timed out.")
|
|
|
|
result = env.evaluate()
|
|
logger.info("Result: %.2f", result)
|
|
|
|
|
|
# fixme: change to write the result into a separate file
|
|
with open(trajectory_recording_path, "a") as f:
|
|
f.write(json.dumps({
|
|
"result": result
|
|
}))
|
|
f.write("\n")
|
|
|
|
# todo: append the result to the wandb for visualization
|
|
|
|
# env.close()
|
|
logger.info("Environment closed.")
|
|
|
|
|
|
def main(example_class, example_id, gpt4_model="gpt-4-vision-preview"):
|
|
# todo: merge the main function into the run_one_example function
|
|
# fixme: change all the settings like action_space, model, etc. to the argparser
|
|
action_space = "pyautogui"
|
|
gemini_model = "gemini-pro-vision"
|
|
|
|
logger.info("Running example %s/%s", example_class, example_id)
|
|
logger.info("Using model %s", gpt4_model)
|
|
# logger.info("Using model %s", gemini_model)
|
|
|
|
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
|
|
example = json.load(f)
|
|
example["snapshot"] = "exp_v5"
|
|
|
|
api_key = os.environ.get("OPENAI_API_KEY")
|
|
agent = GPT4v_Agent(api_key=api_key,
|
|
model=gpt4_model,
|
|
instruction=example['instruction'],
|
|
action_space=action_space,
|
|
exp="screenshot")
|
|
#
|
|
# api_key = os.environ.get("GENAI_API_KEY")
|
|
# agent = GeminiPro_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
|
|
|
|
root_trajectory_dir = "exp_trajectory"
|
|
|
|
example_trajectory_dir = os.path.join(root_trajectory_dir, "screenshot", example_class, gpt4_model, example_id)
|
|
# example_trajectory_dir = os.path.join(root_trajectory_dir, "screenshot", example_class, gemini_model, example_id)
|
|
|
|
os.makedirs(example_trajectory_dir, exist_ok=True)
|
|
|
|
if os.path.exists(os.path.join(example_trajectory_dir, "trajectory.json")):
|
|
with open(os.path.join(example_trajectory_dir, "trajectory.json"), "r") as f:
|
|
lines = f.readlines()
|
|
# strip the last line if it is empty
|
|
lines = [line.strip() for line in lines if line.strip() != ""]
|
|
if len(lines) > 0:
|
|
last_line = json.loads(lines[-1])
|
|
if "result" in last_line:
|
|
logger.info(f"evaluation_examples/examples/{example_class}/{example_id}.json" + "has been evaluated. Skip.")
|
|
return
|
|
|
|
try:
|
|
func_timeout.func_timeout(1200, run_one_example, args=(example, agent, 15, example_trajectory_dir))
|
|
except Exception as e:
|
|
print(f"An error occurred: {e}")
|
|
with open(os.path.join(example_trajectory_dir, "trajectory.json"), "a") as f:
|
|
f.write(json.dumps({
|
|
"error": str(e)
|
|
}))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
####### The complete version of the list of examples #######
|
|
|
|
# todo: add recorder of the progress of the examples
|
|
|
|
# todo: remove the useless example files
|
|
|
|
os_list = [
|
|
'94d95f96-9699-4208-98ba-3c3119edf9c2',
|
|
'bedcedc4-4d72-425e-ad62-21960b11fe0d',
|
|
'43c2d64c-bab5-4dcb-a30c-b888321c319a',
|
|
'7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82',
|
|
'ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3',
|
|
'a462a795-fdc7-4b23-b689-e8b6df786b78',
|
|
'f9be0997-4b7c-45c5-b05c-4612b44a6118',
|
|
'28cc3b7e-b194-4bc9-8353-d04c0f4d56d2',
|
|
'5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57',
|
|
'e0df059f-28a6-4169-924f-b9623e7184cc',
|
|
'ddc75b62-7311-4af8-bfb3-859558542b36',
|
|
'b6781586-6346-41cd-935a-a6b1487918fc',
|
|
'b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa',
|
|
'3ce045a0-877b-42aa-8d2c-b4a863336ab8',
|
|
'fe41f596-a71b-4c2f-9b2f-9dcd40b568c3',
|
|
'a4d98375-215b-4a4d-aee9-3d4370fccc41',
|
|
'13584542-872b-42d8-b299-866967b5c3ef',
|
|
'23393935-50c7-4a86-aeea-2b78fd089c5c',
|
|
'5812b315-e7bd-4265-b51f-863c02174c28',
|
|
'c288e301-e626-4b98-a1ab-159dcb162af5',
|
|
'cc9d4f34-1ca0-4a1b-8ff2-09302696acb9',
|
|
'c56de254-a3ec-414e-81a6-83d2ce8c41fa',
|
|
'4783cc41-c03c-4e1b-89b4-50658f642bd5',
|
|
'5c1075ca-bb34-46a3-a7a0-029bd7463e79',
|
|
'5ced85fc-fa1a-4217-95fd-0fb530545ce2',
|
|
'37887e8c-da15-4192-923c-08fa390a176d',
|
|
'4127319a-8b79-4410-b58a-7a151e15f3d7',
|
|
'4d117223-a354-47fb-8b45-62ab1390a95f',
|
|
'6f56bf42-85b8-4fbb-8e06-6c44960184ba'
|
|
]
|
|
|
|
calc_list = [
|
|
'eb03d19a-b88d-4de4-8a64-ca0ac66f426b',
|
|
'0bf05a7d-b28b-44d2-955a-50b41e24012a',
|
|
'7b802dad-6e0f-4204-9815-d4e3f57627d8',
|
|
'7a4e4bc8-922c-4c84-865c-25ba34136be1',
|
|
'2bd59342-0664-4ccb-ba87-79379096cc08',
|
|
'a9f325aa-8c05-4e4f-8341-9e4358565f4f',
|
|
'ecb0df7a-4e8d-4a03-b162-053391d3afaf',
|
|
'7efeb4b1-3d19-4762-b163-63328d66303b',
|
|
'4e6fcf72-daf3-439f-a232-c434ce416af6',
|
|
'6054afcb-5bab-4702-90a0-b259b5d3217c',
|
|
'abed40dc-063f-4598-8ba5-9fe749c0615d',
|
|
'01b269ae-2111-4a07-81fd-3fcd711993b0',
|
|
'8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14',
|
|
'0cecd4f3-74de-457b-ba94-29ad6b5dafb6',
|
|
'4188d3a4-077d-46b7-9c86-23e1a036f6c1',
|
|
'51b11269-2ca8-4b2a-9163-f21758420e78',
|
|
'7e429b8d-a3f0-4ed0-9b58-08957d00b127',
|
|
'347ef137-7eeb-4c80-a3bb-0951f26a8aff',
|
|
'6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5',
|
|
'3aaa4e37-dc91-482e-99af-132a612d40f3',
|
|
'37608790-6147-45d0-9f20-1137bb35703d',
|
|
'f9584479-3d0d-4c79-affa-9ad7afdd8850',
|
|
'd681960f-7bc3-4286-9913-a8812ba3261a',
|
|
'21df9241-f8d7-4509-b7f1-37e501a823f7',
|
|
'1334ca3e-f9e3-4db8-9ca7-b4c653be7d17',
|
|
'357ef137-7eeb-4c80-a3bb-0951f26a8aff',
|
|
'aa3a8974-2e85-438b-b29e-a64df44deb4b',
|
|
'a01fbce3-2793-461f-ab86-43680ccbae25',
|
|
'4f07fbe9-70de-4927-a4d5-bb28bc12c52c',
|
|
'1e8df695-bd1b-45b3-b557-e7d599cf7597',
|
|
'1273e544-688f-496b-8d89-3e0f40aa0606',
|
|
'4172ea6e-6b77-4edb-a9cc-c0014bd1603b',
|
|
'0326d92d-d218-48a8-9ca1-981cd6d064c7',
|
|
'26a8440e-c166-4c50-aef4-bfb77314b46b',
|
|
'1954cced-e748-45c4-9c26-9855b97fbc5e',
|
|
'535364ea-05bd-46ea-9937-9f55c68507e8',
|
|
'4de54231-e4b5-49e3-b2ba-61a0bec721c0',
|
|
'1de60575-bb6e-4c3d-9e6a-2fa699f9f197',
|
|
'0a2e43bf-b26c-4631-a966-af9dfa12c9e5',
|
|
'3a7c8185-25c1-4941-bd7b-96e823c9f21f',
|
|
'04d9aeaf-7bed-4024-bedb-e10e6f00eb7f',
|
|
'42e0a640-4f19-4b28-973d-729602b5a4a7',
|
|
'1d17d234-e39d-4ed7-b46f-4417922a4e7c',
|
|
'21ab7b40-77c2-4ae6-8321-e00d3a086c73',
|
|
'30e3e107-1cfb-46ee-a755-2cd080d7ba6a',
|
|
'12382c62-0cd1-4bf2-bdc8-1d20bf9b2371',
|
|
'035f41ba-6653-43ab-aa63-c86d449d62e5',
|
|
'51719eea-10bc-4246-a428-ac7c433dd4b3'
|
|
]
|
|
|
|
sheetcopilot_list = [
|
|
# "1e8df695-bd1b-45b3-b557-e7d599cf7597",
|
|
# "1273e544-688f-496b-8d89-3e0f40aa0606",
|
|
# "4172ea6e-6b77-4edb-a9cc-c0014bd1603b",
|
|
# "0326d92d-d218-48a8-9ca1-981cd6d064c7",
|
|
# "26a8440e-c166-4c50-aef4-bfb77314b46b",
|
|
# "1954cced-e748-45c4-9c26-9855b97fbc5e",
|
|
# "535364ea-05bd-46ea-9937-9f55c68507e8",
|
|
# "4de54231-e4b5-49e3-b2ba-61a0bec721c0",
|
|
# "1de60575-bb6e-4c3d-9e6a-2fa699f9f197",
|
|
# "0a2e43bf-b26c-4631-a966-af9dfa12c9e5",
|
|
# "3a7c8185-25c1-4941-bd7b-96e823c9f21f",
|
|
# "04d9aeaf-7bed-4024-bedb-e10e6f00eb7f",
|
|
# "42e0a640-4f19-4b28-973d-729602b5a4a7",
|
|
# "1d17d234-e39d-4ed7-b46f-4417922a4e7c",
|
|
"21ab7b40-77c2-4ae6-8321-e00d3a086c73",
|
|
"30e3e107-1cfb-46ee-a755-2cd080d7ba6a",
|
|
"12382c62-0cd1-4bf2-bdc8-1d20bf9b2371",
|
|
"51719eea-10bc-4246-a428-ac7c433dd4b3"
|
|
]
|
|
|
|
impress_list = [
|
|
'5d901039-a89c-4bfb-967b-bf66f4df075e',
|
|
'550ce7e7-747b-495f-b122-acdc4d0b8e54',
|
|
'455d3c66-7dc6-4537-a39a-36d3e9119df7',
|
|
'af23762e-2bfd-4a1d-aada-20fa8de9ce07',
|
|
'c59742c0-4323-4b9d-8a02-723c251deaa0',
|
|
'ef9d12bd-bcee-4ba0-a40e-918400f43ddf',
|
|
'9ec204e4-f0a3-42f8-8458-b772a6797cab',
|
|
'0f84bef9-9790-432e-92b7-eece357603fb',
|
|
'ce88f674-ab7a-43da-9201-468d38539e4a',
|
|
'3b27600c-3668-4abd-8f84-7bcdebbccbdb',
|
|
'a097acff-6266-4291-9fbd-137af7ecd439',
|
|
'bf4e9888-f10f-47af-8dba-76413038b73c',
|
|
'21760ecb-8f62-40d2-8d85-0cee5725cb72',
|
|
'ac9bb6cb-1888-43ab-81e4-a98a547918cd',
|
|
'2cd43775-7085-45d8-89fa-9e35c0a915cf',
|
|
'358aa0a7-6677-453f-ae35-e440f004c31e',
|
|
'a669ef01-ded5-4099-9ea9-25e99b569840',
|
|
'73c99fb9-f828-43ce-b87a-01dc07faa224',
|
|
'15aece23-a215-4579-91b4-69eec72e18da',
|
|
'986fc832-6af2-417c-8845-9272b3a1528b',
|
|
'a434992a-89df-4577-925c-0c58b747f0f4',
|
|
'7dbc52a6-11e0-4c9a-a2cb-1e36cfda80d8',
|
|
'841b50aa-df53-47bd-a73a-22d3a9f73160',
|
|
'8979838c-54a5-4454-a2b8-3d135a1a5c8f',
|
|
'b8adbc24-cef2-4b15-99d5-ecbe7ff445eb',
|
|
'2b94c692-6abb-48ae-ab0b-b3e8a19cb340',
|
|
'9cf05d24-6bd9-4dae-8967-f67d88f5d38a',
|
|
'08aced46-45a2-48d7-993b-ed3fb5b32302',
|
|
'edb61b14-a854-4bf5-a075-c8075c11293a',
|
|
'c82632a4-56b6-4db4-9dd1-3820ee3388e4',
|
|
'39be0d19-634d-4475-8768-09c130f5425d',
|
|
'ac1b39ff-ee4d-4483-abce-c117e98942f0',
|
|
'f23acfd2-c485-4b7c-a1e7-d4303ddfe864',
|
|
'70bca0cc-c117-427e-b0be-4df7299ebeb6',
|
|
'af2d657a-e6b3-4c6a-9f67-9e3ed015974c',
|
|
'57667013-ea97-417c-9dce-2713091e6e2a',
|
|
'0a211154-fda0-48d0-9274-eaac4ce5486d',
|
|
'a53f80cd-4a90-4490-8310-097b011433f6',
|
|
'7ae48c60-f143-4119-b659-15b8f485eb9a',
|
|
'5cfb9197-e72b-454b-900e-c06b0c802b40',
|
|
'05dd4c1d-c489-4c85-8389-a7836c4f0567',
|
|
'5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1',
|
|
'4ed5abd0-8b5d-47bd-839f-cacfa15ca37a',
|
|
'e4ef0baf-4b52-4590-a47e-d4d464cca2d7',
|
|
'ed43c15f-00cb-4054-9c95-62c880865d68',
|
|
'3161d64e-3120-47b4-aaad-6a764a92493b',
|
|
'04578141-1d42-4146-b9cf-6fab4ce5fd74'
|
|
]
|
|
|
|
impress_gpt4_list = [
|
|
# "5d901039-a89c-4bfb-967b-bf66f4df075e",
|
|
# "550ce7e7-747b-495f-b122-acdc4d0b8e54",
|
|
# "455d3c66-7dc6-4537-a39a-36d3e9119df7",
|
|
# "af23762e-2bfd-4a1d-aada-20fa8de9ce07",
|
|
# "c59742c0-4323-4b9d-8a02-723c251deaa0",
|
|
# "ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
|
|
# "9ec204e4-f0a3-42f8-8458-b772a6797cab",
|
|
# "0f84bef9-9790-432e-92b7-eece357603fb",
|
|
# "ce88f674-ab7a-43da-9201-468d38539e4a",
|
|
# "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
|
|
# "a097acff-6266-4291-9fbd-137af7ecd439",
|
|
# "bf4e9888-f10f-47af-8dba-76413038b73c",
|
|
# "21760ecb-8f62-40d2-8d85-0cee5725cb72",
|
|
"ac9bb6cb-1888-43ab-81e4-a98a547918cd",
|
|
"2cd43775-7085-45d8-89fa-9e35c0a915cf",
|
|
"358aa0a7-6677-453f-ae35-e440f004c31e",
|
|
"a669ef01-ded5-4099-9ea9-25e99b569840",
|
|
# The following examples are from PPTC
|
|
"73c99fb9-f828-43ce-b87a-01dc07faa224",
|
|
"15aece23-a215-4579-91b4-69eec72e18da",
|
|
"986fc832-6af2-417c-8845-9272b3a1528b",
|
|
"a434992a-89df-4577-925c-0c58b747f0f4",
|
|
"7dbc52a6-11e0-4c9a-a2cb-1e36cfda80d8",
|
|
"841b50aa-df53-47bd-a73a-22d3a9f73160",
|
|
"8979838c-54a5-4454-a2b8-3d135a1a5c8f",
|
|
"b8adbc24-cef2-4b15-99d5-ecbe7ff445eb",
|
|
"2b94c692-6abb-48ae-ab0b-b3e8a19cb340",
|
|
"9cf05d24-6bd9-4dae-8967-f67d88f5d38a",
|
|
"08aced46-45a2-48d7-993b-ed3fb5b32302",
|
|
"edb61b14-a854-4bf5-a075-c8075c11293a",
|
|
"c82632a4-56b6-4db4-9dd1-3820ee3388e4",
|
|
"39be0d19-634d-4475-8768-09c130f5425d",
|
|
"ac1b39ff-ee4d-4483-abce-c117e98942f0",
|
|
"f23acfd2-c485-4b7c-a1e7-d4303ddfe864",
|
|
"70bca0cc-c117-427e-b0be-4df7299ebeb6",
|
|
"af2d657a-e6b3-4c6a-9f67-9e3ed015974c",
|
|
"57667013-ea97-417c-9dce-2713091e6e2a",
|
|
"0a211154-fda0-48d0-9274-eaac4ce5486d",
|
|
"a53f80cd-4a90-4490-8310-097b011433f6",
|
|
"7ae48c60-f143-4119-b659-15b8f485eb9a",
|
|
"5cfb9197-e72b-454b-900e-c06b0c802b40",
|
|
"05dd4c1d-c489-4c85-8389-a7836c4f0567",
|
|
"5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1",
|
|
"4ed5abd0-8b5d-47bd-839f-cacfa15ca37a",
|
|
"e4ef0baf-4b52-4590-a47e-d4d464cca2d7",
|
|
"ed43c15f-00cb-4054-9c95-62c880865d68",
|
|
"3161d64e-3120-47b4-aaad-6a764a92493b",
|
|
"04578141-1d42-4146-b9cf-6fab4ce5fd74"
|
|
]
|
|
|
|
writer_list = [
|
|
'0810415c-bde4-4443-9047-d5f70165a697',
|
|
'0a0faba3-5580-44df-965d-f562a99b291c',
|
|
'0b17a146-2934-46c7-8727-73ff6b6483e8',
|
|
'0e47de2a-32e0-456c-a366-8c607ef7a9d2',
|
|
'0e763496-b6bb-4508-a427-fad0b6c3e195',
|
|
'3ef2b351-8a84-4ff2-8724-d86eae9b842e',
|
|
'4bcb1253-a636-4df4-8cb0-a35c04dfef31',
|
|
'66399b0d-8fda-4618-95c4-bfc6191617e9',
|
|
'6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2',
|
|
'6ada715d-3aae-4a32-a6a7-429b2e43fb93',
|
|
'6f81754e-285d-4ce0-b59e-af7edb02d108',
|
|
'72b810ef-4156-4d09-8f08-a0cf57e7cefe',
|
|
'8472fece-c7dd-4241-8d65-9b3cd1a0b568',
|
|
'88fe4b2d-3040-4c70-9a70-546a47764b48',
|
|
'936321ce-5236-426a-9a20-e0e3c5dc536f',
|
|
'adf5e2c3-64c7-4644-b7b6-d2f0167927e7',
|
|
'b21acd93-60fd-4127-8a43-2f5178f4a830',
|
|
'd53ff5ee-3b1a-431e-b2be-30ed2673079b',
|
|
'e246f6d8-78d7-44ac-b668-fcf47946cb50',
|
|
'e528b65e-1107-4b8c-8988-490e4fece599',
|
|
'ecc2413d-8a48-416e-a3a2-d30106ca36cb',
|
|
'f178a4a9-d090-4b56-bc4c-4b72a61a035d',
|
|
'bb8ccc78-479f-4a2f-a71e-d565e439436b'
|
|
]
|
|
|
|
vlc_list = [
|
|
'59f21cfb-0120-4326-b255-a5b827b38967',
|
|
'8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89',
|
|
'8f080098-ddb1-424c-b438-4e96e5e4786e',
|
|
'bba3381f-b5eb-4439-bd9e-80c22218d5a7',
|
|
'fba2c100-79e8-42df-ae74-b592418d54f4',
|
|
'efcf0d81-0835-4880-b2fd-d866e8bc2294',
|
|
'8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f',
|
|
'aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6',
|
|
'386dbd0e-0241-4a0a-b6a2-6704fba26b1c',
|
|
'9195653c-f4aa-453d-aa95-787f6ccfaae9',
|
|
'd06f0d4d-2cd5-4ede-8de9-598629438c6e',
|
|
'a5bbbcd5-b398-4c91-83d4-55e1e31bbb81',
|
|
'5ac2891a-eacd-4954-b339-98abba077adb',
|
|
'f3977615-2b45-4ac5-8bba-80c17dbe2a37',
|
|
'215dfd39-f493-4bc3-a027-8a97d72c61bf',
|
|
'cb130f0d-d36f-4302-9838-b3baf46139b6',
|
|
'7882ed6e-bece-4bf0-bada-c32dc1ddae72'
|
|
]
|
|
|
|
chrome_list = [
|
|
'bb5e4c0d-f964-439c-97b6-bdb9747de3f4',
|
|
'7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3',
|
|
'06fe7178-4491-4589-810f-2e2bc9502122',
|
|
'e1e75309-3ddb-4d09-92ec-de869c928143',
|
|
'35253b65-1c19-4304-8aa4-6884b8218fc0',
|
|
'2ad9387a-65d8-4e33-ad5b-7580065a27ca',
|
|
'7a5a7856-f1b6-42a4-ade9-1ca81ca0f263',
|
|
'44ee5668-ecd5-4366-a6ce-c1c9b8d4e938',
|
|
'2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3',
|
|
'480bcfea-d68f-4aaa-a0a9-2589ef319381',
|
|
'af630914-714e-4a24-a7bb-f9af687d3b91',
|
|
'3720f614-37fd-4d04-8a6b-76f54f8c222d',
|
|
'99146c54-4f37-4ab8-9327-5f3291665e1e',
|
|
'12086550-11c0-466b-b367-1d9e75b3910e',
|
|
'6766f2b8-8a72-417f-a9e5-56fcaa735837',
|
|
'93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9',
|
|
'ae78f875-5b98-4907-bbb5-9c737fc68c03',
|
|
'3299584d-8f11-4457-bf4c-ce98f7600250',
|
|
'030eeff7-b492-4218-b312-701ec99ee0cc',
|
|
'9656a811-9b5b-4ddf-99c7-5117bcef0626',
|
|
'fc6d8143-9452-4171-9459-7f515143419a',
|
|
'a96b564e-dbe9-42c3-9ccf-b4498073438a',
|
|
'1704f00f-79e6-43a7-961b-cedd3724d5fd',
|
|
'f3b19d1e-2d48-44e9-b4e1-defcae1a0197',
|
|
'82bc8d6a-36eb-4d2d-8801-ef714fb1e55a',
|
|
'47543840-672a-467d-80df-8f7c3b9788c9',
|
|
'c1fa57f3-c3db-4596-8f09-020701085416',
|
|
'da46d875-6b82-4681-9284-653b0c7ae241',
|
|
'6c4c23a1-42a4-43cc-9db1-2f86ff3738cc',
|
|
'f79439ad-3ee8-4f99-a518-0eb60e5652b0',
|
|
'b7895e80-f4d1-4648-bee0-4eb45a6f1fa8',
|
|
'9f3f70fc-5afc-4958-a7b7-3bb4fcb01805',
|
|
'7f52cab9-535c-4835-ac8c-391ee64dc930',
|
|
'82279c77-8fc6-46f6-9622-3ba96f61b477',
|
|
'2888b4e6-5b47-4b57-8bf5-c73827890774',
|
|
'b4f95342-463e-4179-8c3f-193cd7241fb2',
|
|
'f5d96daf-83a8-4c86-9686-bada31fc66ab',
|
|
'121ba48f-9e17-48ce-9bc6-a4fb17a7ebba',
|
|
'368d9ba4-203c-40c1-9fa3-da2f1430ce63',
|
|
'59155008-fe71-45ec-8a8f-dc35497b6aa8',
|
|
'a728a36e-8bf1-4bb6-9a03-ef039a5233f0',
|
|
'b070486d-e161-459b-aa2b-ef442d973b92',
|
|
'0d8b7de3-e8de-4d86-b9fd-dd2dce58a217',
|
|
'9f935cce-0a9f-435f-8007-817732bfc0a5',
|
|
'f0b971a1-6831-4b9b-a50e-22a6e47f45ba',
|
|
'cabb3bae-cccb-41bd-9f5d-0f3a9fecd825'
|
|
]
|
|
|
|
vs_code_list = [
|
|
'0ed39f63-6049-43d4-ba4d-5fa2fe04a951',
|
|
'53ad5833-3455-407b-bbc6-45b4c79ab8fb',
|
|
'eabc805a-bfcf-4460-b250-ac92135819f6',
|
|
'982d12a5-beab-424f-8d38-d2a48429e511',
|
|
'4e60007a-f5be-4bfc-9723-c39affa0a6d3',
|
|
'e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2',
|
|
'9439a27b-18ae-42d8-9778-5f68f891805e',
|
|
'ae506c68-352c-4094-9caa-ee9d42052317',
|
|
'ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae',
|
|
'c714dcee-cad3-4e12-8f3c-12bdcfcdb048',
|
|
'930fdb3b-11a8-46fe-9bac-577332e2640e',
|
|
'276cc624-87ea-4f08-ab93-f770e3790175',
|
|
'9d425400-e9b2-4424-9a4b-d4c7abac4140',
|
|
'5e2d93d8-8ad0-4435-b150-1692aacaa994',
|
|
'6ed0a554-cbee-4b44-84ea-fd6c042f4fe1',
|
|
'ec71221e-ac43-46f9-89b8-ee7d80f7e1c5',
|
|
'70745df8-f2f5-42bd-8074-fbc10334fcc5',
|
|
'57242fad-77ca-454f-b71b-f187181a9f23',
|
|
'c6bf789c-ba3a-4209-971d-b63abf0ab733',
|
|
'0512bb38-d531-4acf-9e7e-0add90816068',
|
|
'847a96b6-df94-4927-97e6-8cc9ea66ced7',
|
|
'7aeae0e2-70ee-4705-821d-1bba5d5b2ddd',
|
|
'dcbe20e8-647f-4f1d-8696-f1c5bbb570e3',
|
|
'7c4cc09e-7a92-40dd-8338-b2286535c4ed',
|
|
'971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6'
|
|
]
|
|
|
|
gimp_list = [
|
|
'7a4deb26-d57d-4ea9-9a73-630f66a7b568',
|
|
'554785e9-4523-4e7a-b8e1-8016f565f56a',
|
|
'77b8ab4d-994f-43ac-8930-8ca087d7c4b4',
|
|
'f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce',
|
|
'd52d6308-ec58-42b7-a2c9-de80e4837b2b',
|
|
'2a729ded-3296-423d-aec4-7dd55ed5fbb3',
|
|
'b148e375-fe0b-4bec-90e7-38632b0d73c2',
|
|
'a746add2-cab0-4740-ac36-c3769d9bfb46',
|
|
'7b7617bd-57cc-468e-9c91-40c4ec2bcb3d',
|
|
'd16c99dc-2a1e-46f2-b350-d97c86c85c15',
|
|
'06ca5602-62ca-47f6-ad4f-da151cde54cc',
|
|
'e2dd0213-26db-4349-abe5-d5667bfd725c',
|
|
'f723c744-e62c-4ae6-98d1-750d3cd7d79d',
|
|
'72f83cdc-bf76-4531-9a1b-eb893a13f8aa',
|
|
'7767eef2-56a3-4cea-8c9f-48c070c7d65b',
|
|
'734d6579-c07d-47a8-9ae2-13339795476b',
|
|
'e19bd559-633b-4b02-940f-d946248f088e',
|
|
'38f48d40-764e-4e77-a7cf-51dfce880291',
|
|
'fbb548ca-c2a6-4601-9204-e39a2efc507b',
|
|
'5ca86c6f-f317-49d8-b6a7-b527541caae8',
|
|
'62f7fd55-0687-4a43-b6e1-3eda16fc6252',
|
|
'8ea73f6f-9689-42ad-8c60-195bbf06a7ba',
|
|
'58d3eeeb-e9d0-499f-962e-fd0db2a744d8',
|
|
'2e6f678f-472d-4c55-99cc-8e7c5c402a71',
|
|
'045bf3ff-9077-4b86-b483-a1040a949cff',
|
|
'dbbf4b99-2253-4b10-9274-45f246af2466'
|
|
]
|
|
|
|
thunderbird_list = [
|
|
'bb5e4c0d-f964-439c-97b6-bdb9747de3f4',
|
|
'7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3',
|
|
'12086550-11c0-466b-b367-1d9e75b3910e',
|
|
'06fe7178-4491-4589-810f-2e2bc9502122',
|
|
'6766f2b8-8a72-417f-a9e5-56fcaa735837',
|
|
'e1e75309-3ddb-4d09-92ec-de869c928143',
|
|
'3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5',
|
|
'35253b65-1c19-4304-8aa4-6884b8218fc0',
|
|
'd088f539-cab4-4f9a-ac92-9999fc3a656e',
|
|
'2ad9387a-65d8-4e33-ad5b-7580065a27ca',
|
|
'480bcfea-d68f-4aaa-a0a9-2589ef319381',
|
|
'030eeff7-b492-4218-b312-701ec99ee0cc',
|
|
'94760984-3ff5-41ee-8347-cf1af709fea0',
|
|
'99146c54-4f37-4ab8-9327-5f3291665e1e',
|
|
'c9e7eaf2-b1a1-4efc-a982-721972fa9f02']
|
|
|
|
multiple_list = [
|
|
'2b9493d7-49b8-493a-a71b-56cd1f4d6908',
|
|
'2c9fc0de-3ee7-45e1-a5df-c86206ad78b5',
|
|
'2fe4b718-3bd7-46ec-bdce-b184f5653624',
|
|
'3680a5ee-6870-426a-a997-eba929a0d25c',
|
|
'46407397-a7d5-4c6b-92c6-dbe038b1457b',
|
|
'4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc',
|
|
'510f64c8-9bcc-4be1-8d30-638705850618',
|
|
'51f5801c-18b3-4f25-b0c3-02f85507a078',
|
|
'58565672-7bfe-48ab-b828-db349231de6b',
|
|
'78aed49a-a710-4321-a793-b611a7c5b56b',
|
|
'897e3b53-5d4d-444b-85cb-2cdc8a97d903',
|
|
'937087b6-f668-4ba6-9110-60682ee33441',
|
|
'a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb',
|
|
'b52b40a5-ad70-4c53-b5b0-5650a8387052',
|
|
'c867c42d-a52d-4a24-8ae3-f75d256b5618',
|
|
'd9b7c649-c975-4f53-88f5-940b29c47247',
|
|
'e135df7c-7687-4ac0-a5f0-76b74438b53e',
|
|
'ee9a3c83-f437-4879-8918-be5efbb9fac7',
|
|
'f7dfbef3-7697-431c-883a-db8583a4e4f9',
|
|
'f8cfa149-d1c1-4215-8dac-4a0932bad3c2',
|
|
'6d72aad6-187a-4392-a4c4-ed87269c51cf',
|
|
'f918266a-b3e0-4914-865d-4faa564f1aef',
|
|
'da52d699-e8d2-4dc5-9191-a2199e0b6a9b',
|
|
'bc2b57f3-686d-4ec9-87ce-edf850b7e442',
|
|
'74d5859f-ed66-4d3e-aa0e-93d7a592ce41',
|
|
'b5062e3e-641c-4e3a-907b-ac864d2e7652',
|
|
'00fa164e-2612-4439-992e-157d019a8436',
|
|
'acb0f96b-e27c-44d8-b55f-7cb76609dfcd',
|
|
'69acbb55-d945-4927-a87b-8480e1a5bb7e',
|
|
'48d05431-6cd5-4e76-82eb-12b60d823f7d',
|
|
'68a25bd4-59c7-4f4d-975e-da0c8509c848',
|
|
'eb303e01-261e-4972-8c07-c9b4e7a4922a',
|
|
'0c825995-5b70-4526-b663-113f4c999dd2',
|
|
'c7c1e4c3-9e92-4eba-a4b8-689953975ea4',
|
|
'd1acdb87-bb67-4f30-84aa-990e56a09c92',
|
|
'deec51c9-3b1e-4b9e-993c-4776f20e8bb2',
|
|
'8e116af7-7db7-4e35-a68b-b0939c066c78',
|
|
'337d318b-aa07-4f4f-b763-89d9a2dd013f',
|
|
'82e3c869-49f6-4305-a7ce-f3e64a0618e7',
|
|
'185f29bd-5da0-40a6-b69c-ba7f4e0324ef',
|
|
'869de13e-bef9-4b91-ba51-f6708c40b096',
|
|
'2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e',
|
|
'3a93cae4-ad3e-403e-8c12-65303b271818',
|
|
'1f18aa87-af6f-41ef-9853-cdb8f32ebdea',
|
|
'26150609-0da3-4a7d-8868-0faf9c5f01bb',
|
|
'9219480b-3aed-47fc-8bac-d2cffc5849f7',
|
|
'881deb30-9549-4583-a841-8270c65f2a17',
|
|
'7e287123-70ca-47b9-8521-47db09b69b14',
|
|
'e2392362-125e-4f76-a2ee-524b183a3412',
|
|
'5bc63fb9-276a-4439-a7c1-9dc76401737f',
|
|
'26660ad1-6ebb-4f59-8cba-a8432dfe8d38',
|
|
'a82b78bb-7fde-4cb3-94a4-035baf10bcf0',
|
|
'36037439-2044-4b50-b9d1-875b5a332143',
|
|
'716a6079-22da-47f1-ba73-c9d58f986a38',
|
|
'873cafdd-a581-47f6-8b33-b9696ddb7b05',
|
|
'a74b607e-6bb5-4ea8-8a7c-5d97c7bbcd2a',
|
|
'6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a',
|
|
'da922383-bfa4-4cd3-bbad-6bebab3d7742',
|
|
'2373b66a-092d-44cb-bfd7-82e86e7a3b4d',
|
|
'81c425f5-78f3-4771-afd6-3d2973825947',
|
|
'bb83cab4-e5c7-42c7-a67b-e46068032b86',
|
|
'227d2f97-562b-4ccb-ae47-a5ec9e142fbb',
|
|
'b337d106-053f-4d37-8da0-7f9c4043a66b',
|
|
'20236825-b5df-46e7-89bf-62e1d640a897',
|
|
'8df7e444-8e06-4f93-8a1a-c5c974269d82',
|
|
'aad10cd7-9337-4b62-b704-a857848cedf2',
|
|
'02ce9a50-7af2-47ed-8596-af0c230501f8',
|
|
'4c26e3f3-3a14-4d86-b44a-d3cedebbb487',
|
|
'a503b07f-9119-456b-b75d-f5146737d24f',
|
|
'09a37c51-e625-49f4-a514-20a773797a8a',
|
|
'3e3fc409-bff3-4905-bf16-c968eee3f807',
|
|
'f5c13cdd-205c-4719-a562-348ae5cd1d91',
|
|
'5990457f-2adb-467b-a4af-5c857c92d762',
|
|
'415ef462-bed3-493a-ac36-ca8c6d23bf1b',
|
|
'7ff48d5b-2df2-49da-b500-a5150ffc7f18',
|
|
'9f3bb592-209d-43bc-bb47-d77d9df56504',
|
|
'dd60633f-2c72-42ba-8547-6f2c8cb0fdb0',
|
|
'ce2b64a2-ddc1-4f91-8c7d-a88be7121aac',
|
|
'3f05f3b9-29ba-4b6b-95aa-2204697ffc06',
|
|
'e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56',
|
|
'f8369178-fafe-40c2-adc4-b9b08a125456',
|
|
'778efd0a-153f-4842-9214-f05fc176b877',
|
|
'47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5',
|
|
'c2751594-0cd5-4088-be1b-b5f2f9ec97c4',
|
|
'788b3701-3ec9-4b67-b679-418bfa726c22',
|
|
'48c46dc7-fe04-4505-ade7-723cba1aa6f6',
|
|
'42d25c08-fb87-4927-8b65-93631280a26f',
|
|
'bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108',
|
|
'e8172110-ec08-421b-a6f5-842e6451911f',
|
|
'42f4d1c7-4521-4161-b646-0a8934e36081',
|
|
'3c8f201a-009d-4bbe-8b65-a6f8b35bb57f',
|
|
'd68204bf-11c1-4b13-b48b-d303c73d4bf6',
|
|
'91190194-f406-4cd6-b3f9-c43fac942b22',
|
|
'7f35355e-02a6-45b5-b140-f0be698bcf85',
|
|
'98e8e339-5f91-4ed2-b2b2-12647cb134f4',
|
|
'0e5303d4-8820-42f6-b18d-daf7e633de21',
|
|
'df67aebb-fb3a-44fd-b75b-51b6012df509',
|
|
'5df7b33a-9f77-4101-823e-02f863e1c1ae',
|
|
'aceb0368-56b8-4073-b70e-3dc9aee184e0',
|
|
'22a4636f-8179-4357-8e87-d1743ece1f81',
|
|
'236833a3-5704-47fc-888c-4f298f09f799',
|
|
'67890eb6-6ce5-4c00-9e3d-fb4972699b06',
|
|
]
|
|
|
|
|
|
# for example_id in calc_list:
|
|
# try:
|
|
# with eventlet.Timeout(600, False):
|
|
# main("libreoffice_calc", example_id, gpt4_model="gemini-pro-vision")
|
|
# except Exception as e:
|
|
# logger.error("An error occurred while running the example: %s", e)
|
|
# continue
|
|
|
|
# for example_id in vs_code_list:
|
|
# main("vs_code", example_id, gpt4_model="gemini-pro-vision")
|
|
#
|
|
# for example_id in gimp_list:
|
|
# main("gimp", example_id, gpt4_model="gemini-pro-vision")
|
|
|
|
# todo: specify the class of the example automatically by the example info, rather than hardcoding it
|
|
for example_id in chrome_list:
|
|
main("chrome", example_id, "gemini-pro-vision")
|
|
|
|
for example_id in chrome_list:
|
|
main("chrome", example_id)
|