support aliyun eval of qwen3vl

This commit is contained in:
ludunjie.ldj
2025-10-16 16:20:54 +08:00
parent 55372c4432
commit afd29115da
2 changed files with 9 additions and 9 deletions

View File

@@ -61,7 +61,7 @@ class Qwen3VLAgent:
self, self,
platform: str = "ubuntu", platform: str = "ubuntu",
model: str = "qwen3-vl", model: str = "qwen3-vl",
max_tokens: int = 40960, max_tokens: int = 32768,
top_p: float = 0.9, top_p: float = 0.9,
temperature: float = 0.0, temperature: float = 0.0,
action_space: str = "pyautogui", action_space: str = "pyautogui",
@@ -70,7 +70,7 @@ class Qwen3VLAgent:
add_thought_prefix: bool = False, add_thought_prefix: bool = False,
coordinate_type: str = "relative", coordinate_type: str = "relative",
api_backend: str = "dashscope", # "openai" or "dashscope" api_backend: str = "dashscope", # "openai" or "dashscope"
enable_thinking: bool = True, # Enable thinking mode for DashScope enable_thinking: bool = False, # Enable thinking mode for DashScope
thinking_budget: int = 32768, # Token budget for reasoning thinking_budget: int = 32768, # Token budget for reasoning
): ):
self.platform = platform self.platform = platform
@@ -628,8 +628,8 @@ Previous actions:
def _call_llm_openai(self, messages, model): def _call_llm_openai(self, messages, model):
"""Call LLM using OpenAI SDK (compatible with OpenAI-compatible endpoints).""" """Call LLM using OpenAI SDK (compatible with OpenAI-compatible endpoints)."""
base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" base_url = os.environ.get("OPENAI_BASE_URL", "https://dashscope.aliyuncs.com/compatible-mode/v1")
api_key = "sk-123" api_key = os.environ.get("OPENAI_API_KEY", "sk-123")
client = openai.OpenAI(base_url=base_url, api_key=api_key) client = openai.OpenAI(base_url=base_url, api_key=api_key)
for attempt in range(1, MAX_RETRY_TIMES + 1): for attempt in range(1, MAX_RETRY_TIMES + 1):
@@ -653,8 +653,8 @@ Previous actions:
def _call_llm_dashscope(self, messages, model): def _call_llm_dashscope(self, messages, model):
"""Call LLM using DashScope SDK.""" """Call LLM using DashScope SDK."""
dashscope.base_http_api_url = "https://dashscope.aliyuncs.com/api/v1" dashscope.base_http_api_url = os.environ.get("DASHSCOPE_BASE_URL", "https://dashscope.aliyuncs.com/api/v1")
dashscope.api_key = "sk-123" dashscope.api_key = os.environ.get("DASHSCOPE_API_KEY", "sk-123")
# Convert message schema # Convert message schema
ds_messages = self._to_dashscope_messages(messages) ds_messages = self._to_dashscope_messages(messages)
@@ -669,7 +669,7 @@ Previous actions:
call_params = { call_params = {
"model": model, "model": model,
"messages": ds_messages, "messages": ds_messages,
"max_tokens": min(self.max_tokens, 2048), "max_tokens": self.max_tokens,
# "temperature": self.temperature, # "temperature": self.temperature,
# "top_p": self.top_p, # "top_p": self.top_p,
"vl_high_resolution_images": True, "vl_high_resolution_images": True,

View File

@@ -57,7 +57,7 @@ def config() -> argparse.Namespace:
parser.add_argument("--model", type=str, default="qwen3-vl") parser.add_argument("--model", type=str, default="qwen3-vl")
parser.add_argument("--temperature", type=float, default=0) parser.add_argument("--temperature", type=float, default=0)
parser.add_argument("--top_p", type=float, default=0.9) parser.add_argument("--top_p", type=float, default=0.9)
parser.add_argument("--max_tokens", type=int, default=40960) parser.add_argument("--max_tokens", type=int, default=32768)
parser.add_argument("--stop_token", type=str, default=None) parser.add_argument("--stop_token", type=str, default=None)
parser.add_argument( parser.add_argument(
"--coord", "--coord",
@@ -99,7 +99,7 @@ def config() -> argparse.Namespace:
"--provider_name", "--provider_name",
type=str, type=str,
default="docker", default="docker",
choices=["aws", "virtualbox", "vmware", "docker", "azure"], choices=["aws", "virtualbox", "vmware", "docker", "azure", "aliyun"],
help="Provider name", help="Provider name",
) )
parser.add_argument( parser.add_argument(