Improve on agent codes; add auto-running experiments code; Fix some examples

This commit is contained in:
Timothyxxx
2024-01-27 19:47:47 +08:00
parent f8ff612b85
commit 909aa868f3
8 changed files with 283 additions and 56 deletions

View File

@@ -3,10 +3,12 @@ import json
import logging
import os
import sys
import threading
import time
from desktop_env.envs.desktop_env import DesktopEnv
from mm_agents.gpt_4v_agent import GPT4v_Agent
from mm_agents.gemini_pro_agent import GeminiPro_Agent
# from mm_agents.gemini_pro_agent import GeminiPro_Agent
# Logger Configs {{{ #
logger = logging.getLogger()
@@ -98,21 +100,50 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
logger.info("The episode is done.")
break
if recording:
# send a request to the server to stop recording
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
def stop_recording():
try:
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
except Exception as e:
print(f"An error occurred while stopping the recording: {e}")
# Run the `record` function in a separate thread
recording_thread = threading.Thread(target=stop_recording())
recording_thread.start()
# Start a timer for your timeout length (in this case, 60 seconds)
timeout = 60 # seconds
start_time = time.time()
# The main thread will wait for the set timeout period or until the recording is done
while recording_thread.is_alive():
elapsed_time = time.time() - start_time
if elapsed_time >= timeout:
print("Timeout reached. Stopping recording.")
break
time.sleep(0.1) # Sleep for a short time to prevent this loop from using too much CPU
# kill the recording thread if it is still alive
if recording_thread.is_alive():
recording_thread.kill()
# Wait for the recording thread to finish before exiting
recording_thread.join()
result = env.evaluate()
logger.info("Result: %.2f", result)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"result": result
}))
f.write("\n")
# env.close()
logger.info("Environment closed.")
if __name__ == "__main__":
def main(example_class, example_id):
action_space = "pyautogui"
example_class = "thunderbird"
example_id = "bb5e4c0d-f964-439c-97b6-bdb9747de3f4"
gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision"
@@ -120,21 +151,28 @@ if __name__ == "__main__":
logger.info("Using model %s", gpt4_model)
# logger.info("Using model %s", gemini_model)
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_setup2"
example["snapshot"] = "exp_chrome"
# api_key = os.environ.get("OPENAI_API_KEY")
# agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
api_key = os.environ.get("GENAI_API_KEY")
agent = GeminiPro_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
#
# api_key = os.environ.get("GENAI_API_KEY")
# agent = GeminiPro_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
root_trajectory_dir = "exp_trajectory"
example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gemini_model, example_id)
example_trajectory_dir = os.path.join(root_trajectory_dir, "screenshot", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "screenshot", example_class, gemini_model, example_id)
os.makedirs(example_trajectory_dir, exist_ok=True)
run_one_example(example, agent, 15, example_trajectory_dir)
if __name__ == '__main__':
xx_list = [
]
for example_id in xx_list:
main("xx", example_id)