add wandb settings, remember to set WANDB_KEY
This commit is contained in:
55
run.py
55
run.py
@@ -7,6 +7,7 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import wandb
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
@@ -48,6 +49,11 @@ logger.addHandler(sdebug_handler)
|
||||
|
||||
logger = logging.getLogger("desktopenv.experiment")
|
||||
|
||||
# wandb config
|
||||
### set your wandb api key here
|
||||
os.environ["WANDB_API_KEY"] = ""
|
||||
wandb.login(key=os.environ["WANDB_API_KEY"])
|
||||
|
||||
|
||||
def config() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
@@ -104,6 +110,25 @@ def test(
|
||||
|
||||
# log args
|
||||
logger.info("Args: %s", args)
|
||||
# set wandb project
|
||||
cfg_args = \
|
||||
{
|
||||
"path_to_vm": args.path_to_vm,
|
||||
"headless": args.headless,
|
||||
"action_space": args.action_space,
|
||||
"observation_type": args.observation_type,
|
||||
"screen_width": args.screen_width,
|
||||
"screen_height": args.screen_height,
|
||||
"sleep_after_execution": args.sleep_after_execution,
|
||||
"max_steps": args.max_steps,
|
||||
"max_trajectory_length": args.max_trajectory_length,
|
||||
"model": args.model,
|
||||
"temperature": args.temperature,
|
||||
"top_p": args.top_p,
|
||||
"max_tokens": args.max_tokens,
|
||||
"stop_token": args.stop_token,
|
||||
"result_dir": args.result_dir
|
||||
}
|
||||
|
||||
agent = PromptAgent(
|
||||
model=args.model,
|
||||
@@ -122,6 +147,8 @@ def test(
|
||||
|
||||
for domain in tqdm(test_all_meta, desc="Domain"):
|
||||
for example_id in tqdm(test_all_meta[domain], desc="Example", leave=False):
|
||||
wandb.init(project=f"OSworld-{args.action_space}-{args.observation_type}-{args.model}", group=f"{domain}",
|
||||
name=f"{example_id}")
|
||||
# example setting
|
||||
config_file = os.path.join(args.test_config_base_dir, f"examples/{domain}/{example_id}.json")
|
||||
with open(config_file, "r", encoding="utf-8") as f:
|
||||
@@ -133,6 +160,10 @@ def test(
|
||||
instruction = example["instruction"]
|
||||
|
||||
logger.info(f"[Instruction]: {instruction}")
|
||||
# wandb each example config settings
|
||||
cfg_args["instruction"] = instruction
|
||||
cfg_args["start_time"] = datetime.datetime.now().strftime("%Y:%m:%d-%H:%M:%S")
|
||||
wandb.config.update(cfg_args)
|
||||
|
||||
example_result_dir = os.path.join(
|
||||
args.result_dir,
|
||||
@@ -148,13 +179,20 @@ def test(
|
||||
lib_run_single.run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir,
|
||||
scores)
|
||||
except Exception as e:
|
||||
logger.error(f"Exception in {domain}/{example_id}: {e}")
|
||||
wandb.log({"Exception": wandb.Table(data=[[f"Exception in {domain}/{example_id}: {e}"]], columns=["Error"])})
|
||||
env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
|
||||
logger.error(f"Time limit exceeded in {domain}/{example_id}")
|
||||
with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
|
||||
f.write(json.dumps({
|
||||
"Error": f"Time limit exceeded in {domain}/{example_id}"
|
||||
}))
|
||||
f.write("\n")
|
||||
# wandb settings
|
||||
os.mkdir(os.path.join(wandb.run.dir, "results/"))
|
||||
for file in os.listdir(example_result_dir):
|
||||
# move file to just under the root dir
|
||||
os.rename(os.path.join(example_result_dir, file), os.path.join(wandb.run.dir, f"./results/{file}"))
|
||||
wandb.finish()
|
||||
|
||||
env.close()
|
||||
logger.info(f"Average score: {sum(scores) / len(scores)}")
|
||||
@@ -235,11 +273,10 @@ if __name__ == '__main__':
|
||||
left_info += f"{domain}: {len(test_file_list[domain])}\n"
|
||||
logger.info(f"Left tasks:\n{left_info}")
|
||||
|
||||
get_result(args.action_space,
|
||||
args.model,
|
||||
args.observation_type,
|
||||
args.result_dir,
|
||||
test_all_meta
|
||||
)
|
||||
|
||||
# test(args, test_all_meta)
|
||||
# get_result(args.action_space,
|
||||
# args.model,
|
||||
# args.observation_type,
|
||||
# args.result_dir,
|
||||
# test_all_meta
|
||||
# )
|
||||
test(args, test_file_list)
|
||||
|
||||
Reference in New Issue
Block a user