feat: refactor run_multienv_qwen25vl.py and qwen25vl_agent.py for improved logging and task management

- Introduced signal handling for graceful shutdown of environments and processes.
- Enhanced logging configuration to support dynamic log levels and structured output.
- Updated argument parsing to include new parameters for model selection and task execution.
- Refactored task distribution logic to streamline environment task management.
- Improved error handling during task execution and environment cleanup.
- Adjusted Qwen25VLAgent initialization to support new model and thought prefix options.
- Reduced max tries for LLM calls to optimize performance.
This commit is contained in:
yuanmengqi
2025-07-22 19:46:42 +00:00
parent 4a5d48000f
commit 82c3cdd590
2 changed files with 383 additions and 207 deletions

View File

@@ -66,25 +66,24 @@ class Qwen25VLAgent:
def __init__(
self,
platform="ubuntu",
planner_model="gpt-4o",
executor_model="qwen2.5vl",
model="qwen2.5-vl-72b-instruct",
max_tokens=1500,
top_p=0.9,
temperature=0.5,
action_space="pyautogui",
observation_type="screenshot",
history_n=4, # Number of previous interactions to include in full detail
add_thought_prefix=False,
):
self.platform = platform
self.planner_model = planner_model
self.executor_model = executor_model
assert self.executor_model is not None, "Executor model cannot be None"
self.model = model
self.max_tokens = max_tokens
self.top_p = top_p
self.temperature = temperature
self.action_space = action_space
self.observation_type = observation_type
self.history_n = history_n # Control how many previous interactions to include
self.add_thought_prefix = add_thought_prefix
assert action_space in ["pyautogui"], "Invalid action space"
assert observation_type in ["screenshot"], "Invalid observation type"
self.thoughts = []
@@ -277,19 +276,20 @@ Previous actions:
})
# append_text = f"""Step {current_step+1}: Thought:"""
append_text = f"""Thought:"""
messages.append({"role": "assistant", "content": [{"type": "text", "text": append_text}]})
if self.add_thought_prefix:
append_text = f"""Thought:"""
messages.append({"role": "assistant", "content": [{"type": "text", "text": append_text}]})
# Call the LLM
response = self.call_llm(
{
"model": self.executor_model,
"model": self.model,
"messages": messages,
"max_tokens": self.max_tokens,
"top_p": self.top_p,
"temperature": self.temperature,
},
self.executor_model,
self.model,
)
logger.info(f"Qwen25VL Output: {response}")
@@ -483,10 +483,10 @@ Previous actions:
continue
# Handle lines inside tool call markers
if line.startswith("<tool_call>"):
if line.startswith("<tool_call>") or line.startswith("") or line.startswith("📐"): # Yeah, it's a bug during data processing
inside_tool_call = True
continue
elif line.startswith("</tool_call>"):
elif line.startswith("</tool_call>") or line.startswith("") or line.startswith("📐"): # Yeah, it's a bug during data processing
if current_tool_call:
# Process the collected tool call
process_tool_call("\n".join(current_tool_call))
@@ -540,12 +540,13 @@ Previous actions:
# todo: check
),
interval=30,
max_tries=10,
max_tries=5,
)
def call_llm(self, payload, model):
messages = payload["messages"]
base_url = "your_base_url"
api_key = "your_api_key"
base_url = os.getenv('DASHSCOPE_BASE_URL', "https://dashscope.aliyuncs.com/compatible-mode/v1")
api_key = os.getenv('DASHSCOPE_API_KEY', "sk-123")
client = openai.OpenAI(
base_url=base_url,