Increase timeout for page load stability in Chrome evaluator
- Updated the timeout for the page load state from 10 seconds to 60 seconds to ensure better stability during page processing. - Removed redundant retry mechanisms from the active tab checks to streamline the code while maintaining existing functionality. - Enhanced logging to provide clearer insights into the page loading process. These changes aim to improve the reliability of the Chrome evaluator without altering the core logic.
This commit is contained in:
@@ -1523,7 +1523,7 @@ def get_active_tab_html_parse(env, config: Dict[str, Any]):
|
||||
for page in context.pages:
|
||||
try:
|
||||
# Wait for page to be stable before checking URL
|
||||
page.wait_for_load_state("networkidle", timeout=10000)
|
||||
page.wait_for_load_state("networkidle", timeout=60000)
|
||||
|
||||
# Check if page is still valid before accessing properties
|
||||
if page.is_closed():
|
||||
|
||||
@@ -3,6 +3,7 @@ import os
|
||||
import re
|
||||
import shutil
|
||||
import io
|
||||
import time
|
||||
from itertools import product
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
@@ -21,30 +22,6 @@ def is_expected_active_tab(active_tab_info: Dict[str, str], rule: Dict[str, Any]
|
||||
if not active_tab_info:
|
||||
return 0.
|
||||
|
||||
# 添加重试机制
|
||||
max_retries = 3
|
||||
retry_delay = 2 # seconds
|
||||
|
||||
for attempt in range(max_retries):
|
||||
# 添加HTTP状态码检查
|
||||
if 'status' in active_tab_info and active_tab_info['status'] >= 400:
|
||||
logger.warning(f"Page load failed (attempt {attempt+1}/{max_retries}), HTTP status: {active_tab_info['status']}")
|
||||
|
||||
if attempt < max_retries - 1:
|
||||
# 重试前刷新页面
|
||||
logger.info(f"Refreshing page and retrying in {retry_delay} seconds...")
|
||||
time.sleep(retry_delay)
|
||||
# 这里需要调用刷新页面的函数(实际实现取决于您的环境)
|
||||
# 伪代码: refresh_active_tab()
|
||||
# 然后重新获取 active_tab_info
|
||||
# 伪代码: active_tab_info = get_active_tab_info()
|
||||
continue
|
||||
else:
|
||||
logger.error(f"Page load failed after {max_retries} attempts")
|
||||
return 0.
|
||||
|
||||
break # 如果状态码正常,跳出重试循环
|
||||
|
||||
match_type = rule['type']
|
||||
|
||||
if match_type == "url":
|
||||
@@ -68,26 +45,6 @@ def is_expected_active_tab_approximate(active_tab_info: Dict[str, str], rule: Di
|
||||
if not active_tab_info:
|
||||
return 0.
|
||||
|
||||
# 添加相同的重试机制
|
||||
max_retries = 3
|
||||
retry_delay = 2 # seconds
|
||||
|
||||
for attempt in range(max_retries):
|
||||
if 'status' in active_tab_info and active_tab_info['status'] >= 400:
|
||||
logger.warning(f"Page load failed (attempt {attempt+1}/{max_retries}), HTTP status: {active_tab_info['status']}")
|
||||
|
||||
if attempt < max_retries - 1:
|
||||
logger.info(f"Refreshing page and retrying in {retry_delay} seconds...")
|
||||
time.sleep(retry_delay)
|
||||
# 伪代码: refresh_active_tab()
|
||||
# 伪代码: active_tab_info = get_active_tab_info()
|
||||
continue
|
||||
else:
|
||||
logger.error(f"Page load failed after {max_retries} attempts")
|
||||
return 0.
|
||||
|
||||
break
|
||||
|
||||
match_type = rule['type']
|
||||
|
||||
if match_type == "url":
|
||||
@@ -118,25 +75,19 @@ def is_expected_url_pattern_match(result, rules) -> float:
|
||||
if not result:
|
||||
return 0.
|
||||
|
||||
# 添加相同的重试机制
|
||||
max_retries = 3
|
||||
retry_delay = 2 # seconds
|
||||
|
||||
for attempt in range(max_retries):
|
||||
if isinstance(result, dict) and 'status' in result and result['status'] >= 400:
|
||||
logger.warning(f"Page load failed (attempt {attempt+1}/{max_retries}), HTTP status: {result['status']}")
|
||||
|
||||
if attempt < max_retries - 1:
|
||||
logger.info(f"Refreshing page and retrying in {retry_delay} seconds...")
|
||||
time.sleep(retry_delay)
|
||||
# 伪代码: refresh_active_tab()
|
||||
# 伪代码: result = get_active_tab_info()
|
||||
continue
|
||||
else:
|
||||
logger.error(f"Page load failed after {max_retries} attempts")
|
||||
return 0.
|
||||
# Extract URL from result parameter - result can be either a string URL or a dict with 'url' field
|
||||
if isinstance(result, str):
|
||||
result_url = result
|
||||
logger.info("result url: {}".format(result_url))
|
||||
elif isinstance(result, dict) and 'url' in result:
|
||||
result_url = result['url']
|
||||
logger.info("result url: {}".format(result_url))
|
||||
else:
|
||||
logger.error(f"Invalid result format: {type(result)}, expected string URL or dict with 'url' field")
|
||||
return 0.
|
||||
|
||||
break
|
||||
logger.info(f"Result URL to match: {result_url}")
|
||||
|
||||
# expect_regex = re.compile(rules["expected"])
|
||||
patterns = rules["expected"]
|
||||
logger.info("expected_regex: {}".format(patterns))
|
||||
|
||||
Reference in New Issue
Block a user