feat: enhance run_coact.py with logging and configuration options
- Added logging configuration to capture runtime logs in both file and console with adjustable log levels. - Introduced new command-line arguments for provider name, region, and client password to improve flexibility and security. - Updated process_task function to accommodate new parameters, ensuring compatibility with existing logic. - Modified prompt templates in coding_agent.py and cua_agent.py to use the client password placeholder for enhanced security.
This commit is contained in:
@@ -16,7 +16,7 @@ You can write code in ```bash...``` code blocks for bash scripts, and ```python.
|
|||||||
- When you write code, you must identify the language (whether it is python or bash) of the code.
|
- When you write code, you must identify the language (whether it is python or bash) of the code.
|
||||||
- Your linux username is "user".
|
- Your linux username is "user".
|
||||||
- Wrap all your code in ONE code block. DO NOT let user save the code as a file and execute it for you.
|
- Wrap all your code in ONE code block. DO NOT let user save the code as a file and execute it for you.
|
||||||
- If you want to use sudo, follow the format: "echo password | sudo -S [YOUR COMMANDS]" (no quotes for the word "password").
|
- If you want to use sudo, follow the format: "echo {CLIENT_PASSWORD} | sudo -S [YOUR COMMANDS]" (no quotes for the word "{CLIENT_PASSWORD}").
|
||||||
- Ignore the error: "sudo: /etc/sudoers.d is world writable".
|
- Ignore the error: "sudo: /etc/sudoers.d is world writable".
|
||||||
- Your python code will be sent line-by-line into a interactive python terminal. Do not include __main__ in your code.
|
- Your python code will be sent line-by-line into a interactive python terminal. Do not include __main__ in your code.
|
||||||
- When import a package, you need to check if the package is installed. If not, you need to install it yourself.
|
- When import a package, you need to check if the package is installed. If not, you need to install it yourself.
|
||||||
|
|||||||
@@ -18,8 +18,8 @@ PROMPT_TEMPLATE = """# Task
|
|||||||
{instruction}
|
{instruction}
|
||||||
|
|
||||||
# Hints
|
# Hints
|
||||||
- Sudo password is "password".
|
- Sudo password is "{CLIENT_PASSWORD}".
|
||||||
- If you meet "Authentication required" dialog, enter the "password" to continue.
|
- If you meet "Authentication required" dialog, enter the "{CLIENT_PASSWORD}" to continue.
|
||||||
- Do not close the any application or window or tab that is already opened.
|
- Do not close the any application or window or tab that is already opened.
|
||||||
- Do not close the window at the end of the task.
|
- Do not close the window at the end of the task.
|
||||||
- If you have completed the user task, reply with the information you want the user to know along with 'TERMINATE'.
|
- If you have completed the user task, reply with the information you want the user to know along with 'TERMINATE'.
|
||||||
@@ -154,6 +154,7 @@ def run_cua(
|
|||||||
screen_height: int = 1080,
|
screen_height: int = 1080,
|
||||||
sleep_after_execution: float = 0.3,
|
sleep_after_execution: float = 0.3,
|
||||||
truncate_history_inputs: int = 100,
|
truncate_history_inputs: int = 100,
|
||||||
|
client_password: str = "",
|
||||||
) -> Tuple[str, float]:
|
) -> Tuple[str, float]:
|
||||||
client = OpenAI()
|
client = OpenAI()
|
||||||
|
|
||||||
@@ -166,7 +167,7 @@ def run_cua(
|
|||||||
history_inputs = [{
|
history_inputs = [{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": [
|
||||||
{"type": "input_text", "text": PROMPT_TEMPLATE.format(instruction=instruction)},
|
{"type": "input_text", "text": PROMPT_TEMPLATE.format(instruction=instruction, CLIENT_PASSWORD=client_password)},
|
||||||
{"type": "input_image", "image_url": f"data:image/png;base64,{screenshot_b64}"},
|
{"type": "input_image", "image_url": f"data:image/png;base64,{screenshot_b64}"},
|
||||||
],
|
],
|
||||||
}]
|
}]
|
||||||
|
|||||||
88
run_coact.py
88
run_coact.py
@@ -1,6 +1,7 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import base64
|
import base64
|
||||||
import glob
|
import glob
|
||||||
|
import datetime
|
||||||
import shutil
|
import shutil
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
@@ -11,9 +12,7 @@ from mm_agents.coact.autogen import LLMConfig
|
|||||||
import logging
|
import logging
|
||||||
from multiprocessing import Pool, cpu_count
|
from multiprocessing import Pool, cpu_count
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
import sys
|
||||||
|
|
||||||
logger = logging.getLogger("desktopenv")
|
|
||||||
|
|
||||||
|
|
||||||
TASK_DESCRIPTION = """# Your role
|
TASK_DESCRIPTION = """# Your role
|
||||||
@@ -50,10 +49,13 @@ def config() -> argparse.Namespace:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# environment config
|
# environment config
|
||||||
parser.add_argument("--path_to_vm", type=str, default=os.environ["VMS_DIR"] + "/Ubuntu.qcow2")
|
parser.add_argument("--path_to_vm", type=str, default=None)
|
||||||
|
parser.add_argument("--provider_name", type=str, default="docker")
|
||||||
parser.add_argument("--screen_width", type=int, default=1920)
|
parser.add_argument("--screen_width", type=int, default=1920)
|
||||||
parser.add_argument("--screen_height", type=int, default=1080)
|
parser.add_argument("--screen_height", type=int, default=1080)
|
||||||
parser.add_argument("--sleep_after_execution", type=float, default=0.5)
|
parser.add_argument("--sleep_after_execution", type=float, default=0.5)
|
||||||
|
parser.add_argument("--region", type=str, default="us-east-1")
|
||||||
|
parser.add_argument("--client_password", type=str, default="")
|
||||||
|
|
||||||
# agent config
|
# agent config
|
||||||
parser.add_argument("--oai_config_path", type=str, default="OAI_CONFIG_LIST")
|
parser.add_argument("--oai_config_path", type=str, default="OAI_CONFIG_LIST")
|
||||||
@@ -77,24 +79,67 @@ def config() -> argparse.Namespace:
|
|||||||
# logging related
|
# logging related
|
||||||
parser.add_argument("--result_dir", type=str, default="./results")
|
parser.add_argument("--result_dir", type=str, default="./results")
|
||||||
parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to run in parallel")
|
parser.add_argument("--num_envs", type=int, default=1, help="Number of environments to run in parallel")
|
||||||
|
parser.add_argument("--log_level", type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||||
|
default='INFO', help="Set the logging level")
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
args = config()
|
||||||
|
|
||||||
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
log_level = getattr(logging, args.log_level.upper())
|
||||||
|
logger.setLevel(log_level)
|
||||||
|
|
||||||
|
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
|
||||||
|
|
||||||
|
file_handler = logging.FileHandler(
|
||||||
|
os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8"
|
||||||
|
)
|
||||||
|
debug_handler = logging.FileHandler(
|
||||||
|
os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8"
|
||||||
|
)
|
||||||
|
stdout_handler = logging.StreamHandler(sys.stdout)
|
||||||
|
|
||||||
|
file_handler.setLevel(logging.INFO)
|
||||||
|
debug_handler.setLevel(logging.DEBUG)
|
||||||
|
stdout_handler.setLevel(log_level)
|
||||||
|
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s"
|
||||||
|
)
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
debug_handler.setFormatter(formatter)
|
||||||
|
stdout_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
stdout_handler.addFilter(logging.Filter("desktopenv"))
|
||||||
|
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
logger.addHandler(debug_handler)
|
||||||
|
logger.addHandler(stdout_handler)
|
||||||
|
# }}} Logger Configs #
|
||||||
|
|
||||||
|
logger = logging.getLogger("desktopenv.expeiment")
|
||||||
|
|
||||||
|
|
||||||
def process_task(task_info,
|
def process_task(task_info,
|
||||||
path_to_vm,
|
provider_name,
|
||||||
orchestrator_model="o3",
|
path_to_vm,
|
||||||
coding_model='o4-mini',
|
orchestrator_model="o3",
|
||||||
save_dir='results',
|
coding_model='o4-mini',
|
||||||
orchestrator_max_steps=15,
|
save_dir='results',
|
||||||
cua_max_steps=25,
|
orchestrator_max_steps=15,
|
||||||
coding_max_steps=20,
|
cua_max_steps=25,
|
||||||
cut_off_steps=150,
|
coding_max_steps=20,
|
||||||
screen_width=1920,
|
cut_off_steps=150,
|
||||||
screen_height=1080,
|
screen_width=1920,
|
||||||
sleep_after_execution=0.5,
|
screen_height=1080,
|
||||||
config_path="OAI_CONFIG_LIST"):
|
sleep_after_execution=0.5,
|
||||||
|
config_path="OAI_CONFIG_LIST",
|
||||||
|
region="us-east-1",
|
||||||
|
client_password="",
|
||||||
|
):
|
||||||
"""Worker function to process a single task"""
|
"""Worker function to process a single task"""
|
||||||
domain, ex_id, cfg = task_info
|
domain, ex_id, cfg = task_info
|
||||||
|
|
||||||
@@ -118,6 +163,7 @@ def process_task(task_info,
|
|||||||
name="orchestrator_proxy",
|
name="orchestrator_proxy",
|
||||||
is_termination_msg=lambda x: x.get("content", "") and ("terminate" in x.get("content", "")[0]["text"].lower() or "infeasible" in x.get("content", "")[0]["text"].lower()),
|
is_termination_msg=lambda x: x.get("content", "") and ("terminate" in x.get("content", "")[0]["text"].lower() or "infeasible" in x.get("content", "")[0]["text"].lower()),
|
||||||
human_input_mode="NEVER",
|
human_input_mode="NEVER",
|
||||||
|
provider_name=provider_name,
|
||||||
path_to_vm=path_to_vm,
|
path_to_vm=path_to_vm,
|
||||||
screen_width=screen_width,
|
screen_width=screen_width,
|
||||||
screen_height=screen_height,
|
screen_height=screen_height,
|
||||||
@@ -128,6 +174,8 @@ def process_task(task_info,
|
|||||||
truncate_history_inputs=cua_max_steps + 1,
|
truncate_history_inputs=cua_max_steps + 1,
|
||||||
cua_max_steps=cua_max_steps,
|
cua_max_steps=cua_max_steps,
|
||||||
coding_max_steps=coding_max_steps,
|
coding_max_steps=coding_max_steps,
|
||||||
|
region=region,
|
||||||
|
client_password=client_password
|
||||||
)
|
)
|
||||||
|
|
||||||
obs = orchestrator_proxy.reset(task_config=task_config)
|
obs = orchestrator_proxy.reset(task_config=task_config)
|
||||||
@@ -237,6 +285,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# Create a partial function with fixed config_path, model and debug
|
# Create a partial function with fixed config_path, model and debug
|
||||||
process_func = partial(process_task,
|
process_func = partial(process_task,
|
||||||
|
provider_name=args.provider_name,
|
||||||
path_to_vm=args.path_to_vm,
|
path_to_vm=args.path_to_vm,
|
||||||
save_dir=args.result_dir,
|
save_dir=args.result_dir,
|
||||||
coding_model=args.coding_model,
|
coding_model=args.coding_model,
|
||||||
@@ -248,7 +297,10 @@ if __name__ == "__main__":
|
|||||||
cut_off_steps=args.cut_off_steps,
|
cut_off_steps=args.cut_off_steps,
|
||||||
screen_width=args.screen_width,
|
screen_width=args.screen_width,
|
||||||
screen_height=args.screen_height,
|
screen_height=args.screen_height,
|
||||||
sleep_after_execution=args.sleep_after_execution)
|
sleep_after_execution=args.sleep_after_execution,
|
||||||
|
region=args.region,
|
||||||
|
client_password=args.client_password
|
||||||
|
)
|
||||||
|
|
||||||
# Process tasks in parallel
|
# Process tasks in parallel
|
||||||
with Pool(processes=num_workers) as pool:
|
with Pool(processes=num_workers) as pool:
|
||||||
|
|||||||
Reference in New Issue
Block a user