Merge
This commit is contained in:
17
run.py
17
run.py
@@ -6,8 +6,8 @@ import datetime
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
import signal
|
import signal
|
||||||
|
import sys
|
||||||
|
|
||||||
from desktop_env.envs.desktop_env import DesktopEnv
|
from desktop_env.envs.desktop_env import DesktopEnv
|
||||||
from mm_agents.agent import PromptAgent
|
from mm_agents.agent import PromptAgent
|
||||||
@@ -46,11 +46,15 @@ logger.addHandler(sdebug_handler)
|
|||||||
|
|
||||||
logger = logging.getLogger("desktopenv.experiment")
|
logger = logging.getLogger("desktopenv.experiment")
|
||||||
|
|
||||||
|
|
||||||
# make sure each example won't exceed the time limit
|
# make sure each example won't exceed the time limit
|
||||||
def handler(signo, frame):
|
def handler(signo, frame):
|
||||||
raise RuntimeError("Time limit exceeded!")
|
raise RuntimeError("Time limit exceeded!")
|
||||||
|
|
||||||
|
|
||||||
signal.signal(signal.SIGALRM, handler)
|
signal.signal(signal.SIGALRM, handler)
|
||||||
|
|
||||||
|
|
||||||
def config() -> argparse.Namespace:
|
def config() -> argparse.Namespace:
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description="Run end-to-end evaluation on the benchmark"
|
description="Run end-to-end evaluation on the benchmark"
|
||||||
@@ -175,7 +179,7 @@ def test(
|
|||||||
|
|
||||||
# Save screenshot and trajectory information
|
# Save screenshot and trajectory information
|
||||||
with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"),
|
with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"),
|
||||||
"wb") as _f:
|
"wb") as _f:
|
||||||
with open(obs['screenshot'], "rb") as __f:
|
with open(obs['screenshot'], "rb") as __f:
|
||||||
screenshot = __f.read()
|
screenshot = __f.read()
|
||||||
_f.write(screenshot)
|
_f.write(screenshot)
|
||||||
@@ -245,6 +249,7 @@ def get_unfinished(action_space, use_model, observation_type, result_dir, total_
|
|||||||
|
|
||||||
return total_file_json
|
return total_file_json
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
####### The complete version of the list of examples #######
|
####### The complete version of the list of examples #######
|
||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
@@ -253,7 +258,13 @@ if __name__ == '__main__':
|
|||||||
with open("evaluation_examples/test_all.json", "r", encoding="utf-8") as f:
|
with open("evaluation_examples/test_all.json", "r", encoding="utf-8") as f:
|
||||||
test_all_meta = json.load(f)
|
test_all_meta = json.load(f)
|
||||||
|
|
||||||
test_file_list = get_unfinished(args.action_space, args.model, args.observation_type, args.result_dir, test_all_meta)
|
test_file_list = get_unfinished(
|
||||||
|
args.action_space,
|
||||||
|
args.model,
|
||||||
|
args.observation_type,
|
||||||
|
args.result_dir,
|
||||||
|
test_all_meta
|
||||||
|
)
|
||||||
left_info = ""
|
left_info = ""
|
||||||
for domain in test_file_list:
|
for domain in test_file_list:
|
||||||
left_info += f"{domain}: {len(test_file_list[domain])}\n"
|
left_info += f"{domain}: {len(test_file_list[domain])}\n"
|
||||||
|
|||||||
Reference in New Issue
Block a user