Improve on agent codes; add auto-running experiments code; Fix some examples

This commit is contained in:
Timothyxxx
2024-01-27 19:47:47 +08:00
parent f8ff612b85
commit 909aa868f3
8 changed files with 283 additions and 56 deletions

View File

@@ -3,6 +3,8 @@ import json
import logging
import os
import sys
import threading
import time
from desktop_env.envs.desktop_env import DesktopEnv
from mm_agents.gpt_4v_agent import GPT4v_Agent
@@ -96,27 +98,56 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
logger.info("The episode is done.")
break
if recording:
# send a request to the server to stop recording
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
def stop_recording():
try:
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
except Exception as e:
print(f"An error occurred while stopping the recording: {e}")
# Run the `record` function in a separate thread
recording_thread = threading.Thread(target=stop_recording())
recording_thread.start()
# Start a timer for your timeout length (in this case, 60 seconds)
timeout = 60 # seconds
start_time = time.time()
# The main thread will wait for the set timeout period or until the recording is done
while recording_thread.is_alive():
elapsed_time = time.time() - start_time
if elapsed_time >= timeout:
print("Timeout reached. Stopping recording.")
break
time.sleep(0.1) # Sleep for a short time to prevent this loop from using too much CPU
# kill the recording thread if it is still alive
if recording_thread.is_alive():
recording_thread.kill()
# Wait for the recording thread to finish before exiting
recording_thread.join()
result = env.evaluate()
logger.info("Result: %.2f", result)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"result": result
}))
f.write("\n")
# env.close()
logger.info("Environment closed.")
if __name__ == "__main__":
def main(example_class, example_id):
action_space = "pyautogui"
example_class = "chrome"
example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision"
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_setup4"
example["snapshot"] = "exp_chrome"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
@@ -133,3 +164,10 @@ if __name__ == "__main__":
os.makedirs(example_trajectory_dir, exist_ok=True)
run_one_example(example, agent, 15, example_trajectory_dir)
if __name__ == '__main__':
xx_list = [
]
for example_id in xx_list:
main("xx", example_id)