Increase timeout for page load stability in Chrome evaluator

- Updated the timeout for the page load state from 10 seconds to 60 seconds to ensure better stability during page processing.
- Removed redundant retry mechanisms from the active tab checks to streamline the code while maintaining existing functionality.
- Enhanced logging to provide clearer insights into the page loading process.

These changes aim to improve the reliability of the Chrome evaluator without altering the core logic.
This commit is contained in:
yuanmengqi
2025-07-18 14:16:16 +00:00
parent fcaefe7bb4
commit 44bd66fc9a
2 changed files with 14 additions and 63 deletions

View File

@@ -1523,7 +1523,7 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
for page in context.pages:
try:
# Wait for page to be stable before checking URL
page.wait_for_load_state("networkidle", timeout=10000)
page.wait_for_load_state("networkidle", timeout=60000)
# Check if page is still valid before accessing properties
if page.is_closed():

View File

@@ -3,6 +3,7 @@ import os
import re
import shutil
import io
import time
from itertools import product
from typing import Any, Dict, List, Union
@@ -21,30 +22,6 @@ def is_expected_active_tab(active_tab_info: Dict[str, str], rule: Dict[str, Any]
if not active_tab_info:
return 0.
# 添加重试机制
max_retries = 3
retry_delay = 2 # seconds
for attempt in range(max_retries):
# 添加HTTP状态码检查
if 'status' in active_tab_info and active_tab_info['status'] >= 400:
logger.warning(f"Page load failed (attempt {attempt+1}/{max_retries}), HTTP status: {active_tab_info['status']}")
if attempt < max_retries - 1:
# 重试前刷新页面
logger.info(f"Refreshing page and retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
# 这里需要调用刷新页面的函数(实际实现取决于您的环境)
# 伪代码: refresh_active_tab()
# 然后重新获取 active_tab_info
# 伪代码: active_tab_info = get_active_tab_info()
continue
else:
logger.error(f"Page load failed after {max_retries} attempts")
return 0.
break # 如果状态码正常,跳出重试循环
match_type = rule['type']
if match_type == "url":
@@ -68,26 +45,6 @@ def is_expected_active_tab_approximate(active_tab_info: Dict[str, str], rule: Di
if not active_tab_info:
return 0.
# 添加相同的重试机制
max_retries = 3
retry_delay = 2 # seconds
for attempt in range(max_retries):
if 'status' in active_tab_info and active_tab_info['status'] >= 400:
logger.warning(f"Page load failed (attempt {attempt+1}/{max_retries}), HTTP status: {active_tab_info['status']}")
if attempt < max_retries - 1:
logger.info(f"Refreshing page and retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
# 伪代码: refresh_active_tab()
# 伪代码: active_tab_info = get_active_tab_info()
continue
else:
logger.error(f"Page load failed after {max_retries} attempts")
return 0.
break
match_type = rule['type']
if match_type == "url":
@@ -118,25 +75,19 @@ def is_expected_url_pattern_match(result, rules) -> float:
if not result:
return 0.
# 添加相同的重试机制
max_retries = 3
retry_delay = 2 # seconds
for attempt in range(max_retries):
if isinstance(result, dict) and 'status' in result and result['status'] >= 400:
logger.warning(f"Page load failed (attempt {attempt+1}/{max_retries}), HTTP status: {result['status']}")
if attempt < max_retries - 1:
logger.info(f"Refreshing page and retrying in {retry_delay} seconds...")
time.sleep(retry_delay)
# 伪代码: refresh_active_tab()
# 伪代码: result = get_active_tab_info()
continue
else:
logger.error(f"Page load failed after {max_retries} attempts")
return 0.
# Extract URL from result parameter - result can be either a string URL or a dict with 'url' field
if isinstance(result, str):
result_url = result
logger.info("result url: {}".format(result_url))
elif isinstance(result, dict) and 'url' in result:
result_url = result['url']
logger.info("result url: {}".format(result_url))
else:
logger.error(f"Invalid result format: {type(result)}, expected string URL or dict with 'url' field")
return 0.
break
logger.info(f"Result URL to match: {result_url}")
# expect_regex = re.compile(rules["expected"])
patterns = rules["expected"]
logger.info("expected_regex: {}".format(patterns))