Run through gpt_4v agent pipeline

2023-11-29 20:21:57 +08:00
parent 28c6edd6b3
commit 3d0d9d7758
8 changed files with 135 additions and 47 deletions
--- a/gpt_4v_agent_exp.py
+++ b/gpt_4v_agent_exp.py
@@ -0,0 +1,47 @@
+import os
+from pprint import pprint
+from desktop_env.envs.desktop_env import DesktopEnv, Action, MouseClick
+from mm_agents.gpt_4v_agent import GPT4v_Agent
+
+
+def gpt_4v_agent():
+    api_key = os.environ.get("OPENAI_API_KEY")
+    agent = GPT4v_Agent(api_key=api_key, instruction="Clear the recycle bin.")
+    env = DesktopEnv(
+        path_to_vm=r"""C:\Users\tianbaox\Documents\Virtual Machines\Win10\Win10.vmx""", # automitically load the snapshot and start the vm
+        #  path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx",
+        username="tianbaox",
+        password="951753",
+        #  host="192.168.7.128",
+        host="http://192.168.13.128:5000",
+        vm_os="windows"
+    )
+
+    # reset the environment to certain snapshot
+    observation = env.reset()
+    done = False
+
+    while not done:
+        # todo: action needs to be redesigned, need to support multiple actions at one step
+        action = agent.predict(obs=observation)
+        print("Action:", action)
+
+
+        # fixme: step not working
+        observation, reward, done, info = env.step(action)
+        print("Observation:", observation)
+        print("Reward:", reward)
+        print("Info:", info)
+
+        print("================================\n")
+
+        if done:
+            print("The episode is done.")
+            break
+
+    env.close()
+    print("Environment closed.")
+
+
+if __name__ == "__main__":
+    gpt_4v_agent()