Files
sci-gui-agent-benchmark/gpt_4v_agent_exp.py
2023-12-02 17:52:00 +08:00

80 lines
2.3 KiB
Python

import os
from pprint import pprint
from desktop_env.envs.desktop_env import DesktopEnv, Action, MouseClick
from mm_agents.gpt_4v_agent import GPT4v_Agent
import uuid
def gpt_4v_agent():
api_key = os.environ.get("OPENAI_API_KEY")
meta_info = {
"instruction": "Open WSJ website to get latest news",
"task_name": "open_wsj",
"snapshot_path": "base",
}
# meta_info = {
# "instruction": "Clear the recycle bin",
# "task_name": "clean_recycle_bin",
# "snapshot_path": "base",
# }
agent = GPT4v_Agent(api_key=api_key, instruction=meta_info["instruction"])
env = DesktopEnv(
path_to_vm=r"""C:\Users\tianbaox\Documents\Virtual Machines\Win10\Win10.vmx""",
# automitically load the snapshot and start the vm
# path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
snapshot_path="base",
# host="192.168.7.128",
host="http://192.168.13.128:5000",
)
# reset the environment to certain snapshot
observation = env.reset()
done = False
time_idx = 0
# create a file_dir for this agent
file_dir = os.path.join("observations", str(uuid.uuid4()))
os.makedirs(file_dir, exist_ok=True)
# save the meta_info
with open(os.path.join(file_dir, "meta_info.json"), "w") as f:
f.write(str(meta_info))
f.write("\n")
while not done:
actions = agent.predict(obs=observation)
print("Actions:", actions)
with open(os.path.join(file_dir, "obs_{}.png".format(time_idx)), "wb") as f:
# copy the image in the path of observation to the file
with open(observation, "rb") as image_file:
f.write(image_file.read())
# save the actions
with open(os.path.join(file_dir, "actions_{}.json".format(time_idx)), "w") as f:
f.write(str(actions))
f.write("\n")
time_idx += 1
observation, reward, done, info = env.step(actions)
print("Observation:", observation)
print("Reward:", reward)
print("Info:", info)
print("================================\n")
if done:
print("The episode is done.")
break
env.close()
print("Environment closed.")
if __name__ == "__main__":
gpt_4v_agent()