Improve the parallel logic

This commit is contained in:
yuanmengqi
2025-07-17 04:14:20 +00:00
parent cb070307ee
commit 9eeabfc52d
3 changed files with 631 additions and 101 deletions

View File

@@ -11,6 +11,7 @@ from typing import List, Dict
import math
from tqdm import tqdm
from multiprocessing import Process, Manager
from multiprocessing import current_process
import lib_run_single
from desktop_env.desktop_env import DesktopEnv
from mm_agents.openai_cua_agent import OpenAICUAAgent
@@ -130,32 +131,12 @@ logger.addHandler(stdout_handler)
logger = logging.getLogger("desktopenv.experiment")
def distribute_tasks(test_all_meta: dict, num_envs: int) -> List[Dict]:
"""Distribute tasks evenly across environments."""
# Flatten the tasks into a single list
def distribute_tasks(test_all_meta: dict) -> List[tuple]:
all_tasks = []
for domain, examples in test_all_meta.items():
for example_id in examples:
all_tasks.append((domain, example_id))
# Calculate tasks per environment
tasks_per_env = math.ceil(len(all_tasks) / num_envs)
# Distribute tasks
distributed_tasks = []
for i in range(num_envs):
env_tasks = {}
start_idx = i * tasks_per_env
end_idx = min((i + 1) * tasks_per_env, len(all_tasks))
for domain, example_id in all_tasks[start_idx:end_idx]:
if domain not in env_tasks:
env_tasks[domain] = []
env_tasks[domain].append(example_id)
distributed_tasks.append(env_tasks)
return distributed_tasks
return all_tasks
def process_signal_handler(signum, frame, env_idx):
@@ -180,63 +161,58 @@ def process_signal_handler(signum, frame, env_idx):
sys.exit(0)
def run_env_tasks(env_idx: int, env_tasks: dict, args: argparse.Namespace, shared_scores: list):
"""Run tasks for a single environment."""
# Each process has its own list of active environments
def run_env_tasks(task_queue: Queue, args: argparse.Namespace, shared_scores: list):
active_environments = []
env = None
# Setup signal handlers for this process too
signal.signal(signal.SIGINT, lambda signum, frame: process_signal_handler(signum, frame, env_idx))
signal.signal(signal.SIGTERM, lambda signum, frame: process_signal_handler(signum, frame, env_idx))
from desktop_env.providers.aws.manager import IMAGE_ID_MAP
REGION = args.region
screen_size = (args.screen_width, args.screen_height)
ami_id = IMAGE_ID_MAP[REGION].get(screen_size, IMAGE_ID_MAP[REGION][(1920, 1080)])
env = DesktopEnv(
path_to_vm=args.path_to_vm,
action_space=args.action_space,
provider_name=args.provider_name,
region=REGION,
snapshot_name=ami_id,
screen_size=screen_size,
headless=args.headless,
os_type="Ubuntu",
require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
enable_proxy=True,
client_password=args.client_password
)
active_environments.append(env)
agent = OpenAICUAAgent(
env=env,
model=args.model,
max_tokens=args.max_tokens,
top_p=args.top_p,
temperature=args.temperature,
action_space=args.action_space,
observation_type=args.observation_type,
max_trajectory_length=args.max_trajectory_length,
client_password=args.client_password,
provider_name=args.provider_name,
screen_width=args.screen_width,
screen_height=args.screen_height
)
logger.info(f"Executing tasks in environment {env_idx + 1}/{args.num_envs}")
try:
for domain in tqdm(env_tasks, desc=f"Env{env_idx+1}-Domain"):
for example_id in tqdm(env_tasks[domain], desc="Example", leave=False):
from desktop_env.providers.aws.manager import IMAGE_ID_MAP
REGION = args.region
screen_size = (args.screen_width, args.screen_height)
ami_id = IMAGE_ID_MAP[REGION].get(screen_size, IMAGE_ID_MAP[REGION][(1920, 1080)])
env = DesktopEnv(
path_to_vm=args.path_to_vm,
action_space=args.action_space,
provider_name=args.provider_name,
region=REGION,
snapshot_name=ami_id,
screen_size=screen_size,
headless=args.headless,
os_type="Ubuntu",
require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
enable_proxy=True,
client_password=args.client_password
)
active_environments.append(env)
agent = OpenAICUAAgent(
env=env,
model=args.model,
max_tokens=args.max_tokens,
top_p=args.top_p,
temperature=args.temperature,
action_space=args.action_space,
observation_type=args.observation_type,
max_trajectory_length=args.max_trajectory_length,
client_password=args.client_password,
provider_name=args.provider_name,
screen_width=args.screen_width,
screen_height=args.screen_height
)
logger.info(f"Process {current_process().name} started.")
while True:
try:
item = task_queue.get(timeout=5)
except Exception:
break
domain, example_id = item
try:
config_file = os.path.join(
args.test_config_base_dir, f"examples/{domain}/{example_id}.json"
)
with open(config_file, "r", encoding="utf-8") as f:
example = json.load(f)
logger.info(f"[Env {env_idx+1}][Domain]: {domain}")
logger.info(f"[Env {env_idx+1}][Example ID]: {example_id}")
logger.info(f"[Env {env_idx+1}][Instruction]: {example['instruction']}")
logger.info(f"[{current_process().name}][Domain]: {domain}")
logger.info(f"[{current_process().name}][Example ID]: {example_id}")
logger.info(f"[{current_process().name}][Instruction]: {example['instruction']}")
example_result_dir = os.path.join(
args.result_dir,
args.action_space,
@@ -246,7 +222,6 @@ def run_env_tasks(env_idx: int, env_tasks: dict, args: argparse.Namespace, share
example_id,
)
os.makedirs(example_result_dir, exist_ok=True)
try:
lib_run_single.run_single_example_openaicua(
agent,
@@ -260,7 +235,7 @@ def run_env_tasks(env_idx: int, env_tasks: dict, args: argparse.Namespace, share
)
except Exception as e:
import traceback
logger.error(f"Exception in Env{env_idx+1} {domain}/{example_id}: {e}")
logger.error(f"Exception in {current_process().name} {domain}/{example_id}: {e}")
logger.error(traceback.format_exc())
try:
env.controller.end_recording(
@@ -268,7 +243,6 @@ def run_env_tasks(env_idx: int, env_tasks: dict, args: argparse.Namespace, share
)
except Exception as rec_e:
logger.error(f"Failed to end recording: {rec_e}")
with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
f.write(
json.dumps(
@@ -276,14 +250,22 @@ def run_env_tasks(env_idx: int, env_tasks: dict, args: argparse.Namespace, share
)
)
f.write("\n")
except Exception as e:
logger.error(f"Task-level error in {current_process().name}: {e}")
import traceback
logger.error(traceback.format_exc())
except Exception as e:
logger.error(f"Process-level error in {current_process().name}: {e}")
import traceback
logger.error(traceback.format_exc())
finally:
# This ensures the environment is closed even if there's an exception
logger.info(f"Process {env_idx + 1} cleaning up environment...")
logger.info(f"{current_process().name} cleaning up environment...")
try:
env.close()
logger.info(f"Process {env_idx + 1} environment closed successfully")
if env:
env.close()
logger.info(f"{current_process().name} environment closed successfully")
except Exception as e:
logger.error(f"Process {env_idx + 1} error during environment cleanup: {e}")
logger.error(f"{current_process().name} error during environment cleanup: {e}")
def signal_handler(signum, frame):
@@ -323,8 +305,8 @@ def signal_handler(signum, frame):
if p.is_alive():
try:
logger.info(f"Forcefully terminating process {p.name}...")
import signal
os.kill(p.pid, signal.SIGKILL)
import signal as sig
os.kill(p.pid, sig.SIGKILL)
except Exception as e:
logger.error(f"Error forcefully terminating process: {e}")
@@ -335,38 +317,56 @@ def signal_handler(signum, frame):
def test(args: argparse.Namespace, test_all_meta: dict) -> None:
global processes
logger.info("Args: %s", args)
distributed_tasks = distribute_tasks(test_all_meta, args.num_envs)
logger.info("All environments are ready. Starting parallel task execution...")
# Create a shared list for scores across processes
all_tasks = distribute_tasks(test_all_meta)
logger.info(f"Total tasks: {len(all_tasks)}")
with Manager() as manager:
shared_scores = manager.list()
# Create and start processes for each environment
task_queue = manager.Queue()
for item in all_tasks:
task_queue.put(item)
num_envs = args.num_envs
processes = []
for env_idx, env_tasks in enumerate(distributed_tasks):
for i in range(num_envs):
p = Process(
target=run_env_tasks,
args=(env_idx, env_tasks, args, shared_scores)
args=(task_queue, args, shared_scores),
name=f"EnvProcess-{i+1}"
)
processes.append(p)
p.daemon = True
p.start()
processes.append(p)
logger.info(f"Started process {p.name} with PID {p.pid}")
try:
# Wait for all processes to complete
while True:
alive_count = 0
for idx, p in enumerate(processes):
if not p.is_alive():
logger.warning(f"Process {p.name} died, restarting...")
new_p = Process(
target=run_env_tasks,
args=(task_queue, args, shared_scores),
name=f"EnvProcess-Restart-{idx+1}"
)
new_p.daemon = True
new_p.start()
processes[idx] = new_p
logger.info(f"Restarted process {new_p.name} with PID {new_p.pid}")
else:
alive_count += 1
if task_queue.empty():
logger.info("All tasks finished.")
break
if alive_count == 0:
logger.error("All processes died, exiting.")
break
time.sleep(5)
for p in processes:
p.join()
logger.info(f"Process {p.name} completed")
except KeyboardInterrupt:
logger.info("Main process received KeyboardInterrupt. Initiating graceful shutdown...")
# Let the signal handler do the cleanup
raise
except Exception as e:
logger.error(f"Unexpected error while waiting for processes: {e}", exc_info=True)
# Ensure cleanup happens
for p in processes:
if p.is_alive():
try:
@@ -375,10 +375,7 @@ def test(args: argparse.Namespace, test_all_meta: dict) -> None:
except Exception as term_e:
logger.error(f"Error terminating process {p.name}: {term_e}")
raise
# Convert shared list to regular list
scores = list(shared_scores)
logger.info(f"Average score: {sum(scores) / len(scores) if scores else 0}")