diff --git a/conf_my_program.py b/conf_my_program.py new file mode 100644 index 0000000..2102184 --- /dev/null +++ b/conf_my_program.py @@ -0,0 +1,7 @@ +# conf_my_program.py: + +class ConfMyProgram(object): + def __init__(self): + self.name:str = 'my_var_name' + +conf_my_program = ConfMyProgram() \ No newline at end of file diff --git a/demo.py b/demo.py index 9b0bb06..cce0188 100644 --- a/demo.py +++ b/demo.py @@ -1,24 +1,16 @@ -import concurrent.futures -import time +# my_program_main.py: -# Define the function you want to run with a timeout -def my_task(): - print("Task started") - # Simulate a long-running task - time.sleep(5) - print("Task completed") - return "Task result" +import lib_test -# Main program def main(): - with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(my_task) - try: - # Wait for 2 seconds for my_task to complete - result = future.result(timeout=2) - print(f"Task completed with result: {result}") - except concurrent.futures.TimeoutError: - print("Task did not complete in time") + try: + while True: + print(1) + lib_test.mytest() + # try: + # lib_test.mytest() + except Exception as e: + print(e) -if __name__ == "__main__": +if __name__ == '__main__': main() \ No newline at end of file diff --git a/lib_run_single.py b/lib_run_single.py new file mode 100644 index 0000000..4d2f942 --- /dev/null +++ b/lib_run_single.py @@ -0,0 +1,60 @@ +import os +import datetime +import json +from wrapt_timeout_decorator import * +import logging +logger = logging.getLogger("desktopenv.experiment") + +@timeout(60, use_signals=False) +def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores): + agent.reset() + obs = env.reset(task_config=example) + done = False + step_idx = 0 + env.controller.start_recording() + + while not done and step_idx < max_steps: + actions = agent.predict( + instruction, + obs + ) + for action in actions: + # Capture the timestamp before executing the action + action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") + logger.info("Step %d: %s", step_idx + 1, action) + + obs, reward, done, info = env.step(action, args.sleep_after_execution) + + logger.info("Reward: %.2f", reward) + logger.info("Done: %s", done) + logger.info("Info: %s", info) + + # Save screenshot and trajectory information + with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), + "wb") as _f: + with open(obs['screenshot'], "rb") as __f: + screenshot = __f.read() + _f.write(screenshot) + + with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: + f.write(json.dumps({ + "step_num": step_idx + 1, + "action_timestamp": action_timestamp, + "action": action, + "reward": reward, + "done": done, + "info": info, + "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png" + })) + f.write("\n") + + if done: + logger.info("The episode is done.") + break + step_idx += 1 + result = env.evaluate() + logger.info("Result: %.2f", result) + scores.append(result) + with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: + f.write(f"{result}\n") + env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) diff --git a/lib_test.py b/lib_test.py new file mode 100644 index 0000000..75cfc82 --- /dev/null +++ b/lib_test.py @@ -0,0 +1,15 @@ +# lib_test.py: + +from wrapt_timeout_decorator import * +from time import sleep +from conf_my_program import conf_my_program + +# use_signals = False is not really necessary here, it is set automatically under Windows +# but You can force NOT to use Signals under Linux +@timeout(5, use_signals=False) +def mytest(): + print("Start ", conf_my_program.name) + for i in range(1,10): + sleep(1) + print("{} seconds have passed".format(i)) + return i \ No newline at end of file diff --git a/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/recording.mp4 b/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/recording.mp4 new file mode 100644 index 0000000..3e4722d Binary files /dev/null and b/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/recording.mp4 differ diff --git a/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/step_1_20240316@115041.png b/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/step_1_20240316@115041.png new file mode 100644 index 0000000..02b9dcd Binary files /dev/null and b/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/step_1_20240316@115041.png differ diff --git a/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/step_2_20240316@115102.png b/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/step_2_20240316@115102.png new file mode 100644 index 0000000..02b9dcd Binary files /dev/null and b/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/step_2_20240316@115102.png differ diff --git a/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/traj.jsonl b/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/traj.jsonl new file mode 100644 index 0000000..4df986d --- /dev/null +++ b/results/pyautogui/som/gpt-4-vision-preview/chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3/traj.jsonl @@ -0,0 +1,3 @@ +{"step_num": 1, "action_timestamp": "20240316@115041", "action": "tag_1=(1212, 753)\ntag_2=(302, 81)\ntag_3=(541, 81)\ntag_4=(583, 81)\ntag_5=(1248, 81)\ntag_6=(156, 119)\ntag_7=(196, 119)\ntag_8=(642, 119)\ntag_9=(1090, 119)\ntag_10=(1122, 119)\ntag_11=(1162, 119)\ntag_12=(1194, 119)\ntag_13=(1226, 119)\ntag_14=(1262, 119)\ntag_15=(675, 486)\ntag_16=(147, 202)\ntag_17=(682, 201)\ntag_18=(672, 201)\ntag_19=(1078, 201)\ntag_20=(667, 638)\ntag_21=(667, 421)\ntag_22=(667, 420)\ntag_23=(667, 419)\ntag_24=(667, 798)\ntag_25=(667, 797)\npyautogui.click(tag_15)", "reward": 0, "done": false, "info": {}, "screenshot_file": "step_1_20240316@115041.png"} +{"step_num": 2, "action_timestamp": "20240316@115102", "action": "tag_1=(1212, 753)\ntag_2=(302, 81)\ntag_3=(541, 81)\ntag_4=(583, 81)\ntag_5=(1248, 81)\ntag_6=(156, 119)\ntag_7=(196, 119)\ntag_8=(642, 119)\ntag_9=(1090, 119)\ntag_10=(1122, 119)\ntag_11=(1162, 119)\ntag_12=(1194, 119)\ntag_13=(1226, 119)\ntag_14=(1262, 119)\ntag_15=(675, 486)\ntag_16=(667, 322)\ntag_17=(688, 294)\ntag_18=(686, 291)\ntag_19=(688, 335)\ntag_20=(686, 335)\ntag_21=(667, 558)\ntag_22=(667, 545)\ntag_23=(667, 518)\ntag_24=(667, 451)\ntag_25=(654, 449)\ntag_26=(667, 509)\ntag_27=(733, 554)\ntag_28=(742, 554)\ntag_29=(742, 554)\ntag_30=(667, 579)\ntag_31=(667, 661)\ntag_32=(667, 660)\ntag_33=(667, 802)\ntag_34=(617, 801)\ntag_35=(617, 801)\ntag_36=(727, 801)\ntag_37=(727, 801)\n# Estimating the position of the browser's menu button\nmenu_x = tag_16[0] + (tag_18[0] - tag_16[0]) - 50 # 50 pixels to the left from the right end of the address bar\nmenu_y = tag_16[1] + (tag_19[1] - tag_16[1]) / 2 # Vertically centered between the top of the address bar and the bottom\npyautogui.click(menu_x, menu_y)", "reward": 0, "done": false, "info": {}, "screenshot_file": "step_2_20240316@115102.png"} +{"Error": "Time limit exceeded in chrome/7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"} diff --git a/results/pyautogui/som/gpt-4-vision-preview/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4/traj.jsonl b/results/pyautogui/som/gpt-4-vision-preview/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4/traj.jsonl new file mode 100644 index 0000000..a85797b --- /dev/null +++ b/results/pyautogui/som/gpt-4-vision-preview/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4/traj.jsonl @@ -0,0 +1 @@ +{"Error": "Time limit exceeded in chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4"} diff --git a/run.py b/run.py index 0eb5116..23b2169 100644 --- a/run.py +++ b/run.py @@ -8,13 +8,13 @@ import logging import os import sys -from tqdm # import tqdm +from tqdm import tqdm import time -import timeout_decorator +# import timeout_decorator from desktop_env.envs.desktop_env import DesktopEnv from mm_agents.agent import PromptAgent - +import lib_run_single # Logger Configs {{{ # logger = logging.getLogger() logger.setLevel(logging.DEBUG) @@ -49,12 +49,6 @@ logger.addHandler(sdebug_handler) logger = logging.getLogger("desktopenv.experiment") - -# make sure each example won't exceed the time limit -# def handler(signo, frame): -# raise RuntimeError("Time limit exceeded!") -# signal.signal(signal.SIGALRM, handler) - def config() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Run end-to-end evaluation on the benchmark" @@ -151,80 +145,17 @@ def test( example_id ) os.makedirs(example_result_dir, exist_ok=True) - - - @timeout_decorator.timeout(seconds=time_limit, timeout_exception=RuntimeError, exception_message="Time limit exceeded.") - def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores): - agent.reset() - obs = env.reset(task_config=example) - done = False - step_idx = 0 - env.controller.start_recording() - - while not done and step_idx < max_steps: - actions = agent.predict( - instruction, - obs - ) - for action in actions: - # Capture the timestamp before executing the action - action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") - logger.info("Step %d: %s", step_idx + 1, action) - - obs, reward, done, info = env.step(action, args.sleep_after_execution) - - logger.info("Reward: %.2f", reward) - logger.info("Done: %s", done) - logger.info("Info: %s", info) - - # Save screenshot and trajectory information - with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), - "wb") as _f: - with open(obs['screenshot'], "rb") as __f: - screenshot = __f.read() - _f.write(screenshot) - - with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: - f.write(json.dumps({ - "step_num": step_idx + 1, - "action_timestamp": action_timestamp, - "action": action, - "reward": reward, - "done": done, - "info": info, - "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png" - })) - f.write("\n") - - if done: - logger.info("The episode is done.") - break - step_idx += 1 - - result = env.evaluate() - logger.info("Result: %.2f", result) - scores.append(result) - with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f: - f.write(f"{result}\n") - env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) - # example start running try: - # signal.alarm(time_limit) - run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores) - except RuntimeError as e: - logger.error(f"Error in example {domain}/{example_id}: {e}") - # save info of this example and then continue + lib_run_single.run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores) + except Exception as e: env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) + logger.error(f"Time limit exceeded in {domain}/{example_id}") with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: f.write(json.dumps({ - "Error": f"Error in example {domain}/{example_id}: {e}" + "Error": f"Time limit exceeded in {domain}/{example_id}" })) - f.write("\n") - continue - except Exception as e: - logger.error(f"Error in example {domain}/{example_id}: {e}") - continue + f.write("\n") env.close() logger.info(f"Average score: {sum(scores) / len(scores)}") @@ -281,5 +212,5 @@ if __name__ == '__main__': for domain in test_file_list: left_info += f"{domain}: {len(test_file_list[domain])}\n" logger.info(f"Left tasks:\n{left_info}") - + os.environ["OPENAI_API_KEY"] = "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" test(args, test_all_meta)