add wandb settings, remember to set WANDB_KEY

2024-03-17 22:30:29 +08:00
parent 7feeab8f6b
commit 48aedb09a7
5 changed files with 73 additions and 29 deletions
--- a/run.py
+++ b/run.py
@@ -7,6 +7,7 @@ import json
 import logging
 import os
 import sys
+import wandb

 from tqdm import tqdm

@@ -48,6 +49,11 @@ logger.addHandler(sdebug_handler)

 logger = logging.getLogger("desktopenv.experiment")

+# wandb config
+### set your wandb api key here
+os.environ["WANDB_API_KEY"] = ""
+wandb.login(key=os.environ["WANDB_API_KEY"])
+

 def config() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
@@ -104,6 +110,25 @@ def test(

    # log args
    logger.info("Args: %s", args)
+    # set wandb project
+    cfg_args = \
+    {
+        "path_to_vm": args.path_to_vm,
+        "headless": args.headless,
+        "action_space": args.action_space,
+        "observation_type": args.observation_type,
+        "screen_width": args.screen_width,
+        "screen_height": args.screen_height,
+        "sleep_after_execution": args.sleep_after_execution,
+        "max_steps": args.max_steps,
+        "max_trajectory_length": args.max_trajectory_length,
+        "model": args.model,
+        "temperature": args.temperature,
+        "top_p": args.top_p,
+        "max_tokens": args.max_tokens,
+        "stop_token": args.stop_token,
+        "result_dir": args.result_dir
+    }

    agent = PromptAgent(
        model=args.model,
@@ -122,6 +147,8 @@ def test(

    for domain in tqdm(test_all_meta, desc="Domain"):
        for example_id in tqdm(test_all_meta[domain], desc="Example", leave=False):
+            wandb.init(project=f"OSworld-{args.action_space}-{args.observation_type}-{args.model}", group=f"{domain}", 
+                    name=f"{example_id}")
            # example setting
            config_file = os.path.join(args.test_config_base_dir, f"examples/{domain}/{example_id}.json")
            with open(config_file, "r", encoding="utf-8") as f:
@@ -133,6 +160,10 @@ def test(
            instruction = example["instruction"]

            logger.info(f"[Instruction]: {instruction}")
+            # wandb each example config settings
+            cfg_args["instruction"] = instruction
+            cfg_args["start_time"] = datetime.datetime.now().strftime("%Y:%m:%d-%H:%M:%S")
+            wandb.config.update(cfg_args)   

            example_result_dir = os.path.join(
                args.result_dir,
@@ -148,13 +179,20 @@ def test(
                lib_run_single.run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir,
                                                  scores)
            except Exception as e:
+                logger.error(f"Exception in {domain}/{example_id}: {e}")
+                wandb.log({"Exception": wandb.Table(data=[[f"Exception in {domain}/{example_id}: {e}"]], columns=["Error"])})
                env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
-                logger.error(f"Time limit exceeded in {domain}/{example_id}")
                with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
                    f.write(json.dumps({
                        "Error": f"Time limit exceeded in {domain}/{example_id}"
                    }))
                    f.write("\n")
+            # wandb settings
+            os.mkdir(os.path.join(wandb.run.dir, "results/"))
+            for file in os.listdir(example_result_dir):
+                # move file to just under the root dir
+                os.rename(os.path.join(example_result_dir, file), os.path.join(wandb.run.dir, f"./results/{file}"))
+            wandb.finish()

    env.close()
    logger.info(f"Average score: {sum(scores) / len(scores)}")
@@ -235,11 +273,10 @@ if __name__ == '__main__':
        left_info += f"{domain}: {len(test_file_list[domain])}\n"
    logger.info(f"Left tasks:\n{left_info}")

-    get_result(args.action_space,
-        args.model,
-        args.observation_type,
-        args.result_dir,
-        test_all_meta
-    )
-
-    # test(args, test_all_meta)
+    # get_result(args.action_space,
+    #     args.model,
+    #     args.observation_type,
+    #     args.result_dir,
+    #     test_all_meta
+    # )
+    test(args, test_file_list)