fix chrome 2888b4e6-5b47-4b57-8bf5-c73827890774

This commit is contained in:
yuanmengqi
2025-07-04 07:14:54 +00:00
parent 3d7be9f216
commit 66e669b50b
2 changed files with 19 additions and 14 deletions

View File

@@ -1485,7 +1485,7 @@ def get_url_path_parse(env, config: Dict[str, str]):
""" """
Parse Macy's product url path, extract: Parse Macy's product url path, extract:
- mens_clothing: true if 'mens-clothing' in path, else None - mens_clothing: true if 'mens-clothing' in path, else None
- t_shirts: true if any key 'Top_style' or 'Product_department' value is 'T-shirts', else None - shirts: true if any key 'Top_style' or 'Product_department' value is 'shirts', else None
- Men_regular_size_t, Price_discount_range (as list), Sleeve_length: as before, None if not found - Men_regular_size_t, Price_discount_range (as list), Sleeve_length: as before, None if not found
All fields are None if not found for robustness. All fields are None if not found for robustness.
""" """
@@ -1503,9 +1503,11 @@ def get_url_path_parse(env, config: Dict[str, str]):
# key-value # key-value
path_parts = path.strip('/').split('/') path_parts = path.strip('/').split('/')
key_value_json = {} key_value_json = {}
tshirts_flag = False shirts_flag = False
if "mens-t-shirts" in path: if "shirts" in path:
tshirts_flag = True shirts_flag = True
if "short-sleeve" in path:
short_sleeve_flag = True
for i in range(len(path_parts)-1): for i in range(len(path_parts)-1):
if ',' in path_parts[i] and ',' in path_parts[i+1]: if ',' in path_parts[i] and ',' in path_parts[i+1]:
keys = [k.strip() for k in path_parts[i].split(',')] keys = [k.strip() for k in path_parts[i].split(',')]
@@ -1515,13 +1517,16 @@ def get_url_path_parse(env, config: Dict[str, str]):
key_value_json[k] = [item.strip() for item in v.split('|')] if v else None key_value_json[k] = [item.strip() for item in v.split('|')] if v else None
else: else:
key_value_json[k] = v if v else None key_value_json[k] = v if v else None
if (k == 'Top_style' or k == 'Product_department') and (v == 'T-shirts' or v == 'T-Shirts' or v == 'T-Shirt'): if k == 'Product_department' and (v == 'shirts' or v == 'Shirts' or v == 'Shirt'):
tshirts_flag = True shirts_flag = True
if k == 'Sleeve_length' and (v == 'short-sleeve' or v == 'Short Sleeve'):
short_sleeve_flag = True
break break
for field in ['Men_regular_size_t', 'Price_discount_range', 'Sleeve_length']: for field in ['Men_regular_size_t', 'Price_discount_range']:
if field not in key_value_json: if field not in key_value_json:
key_value_json[field] = None key_value_json[field] = None
result['t_shirts'] = tshirts_flag if tshirts_flag else None result['shirts'] = shirts_flag if shirts_flag else None
result['short_sleeve'] = short_sleeve_flag if short_sleeve_flag else None
# parse_keys # parse_keys
for key in config["parse_keys"]: for key in config["parse_keys"]:
if key in key_value_json: if key in key_value_json:

View File

@@ -1,7 +1,7 @@
{ {
"id": "2888b4e6-5b47-4b57-8bf5-c73827890774", "id": "2888b4e6-5b47-4b57-8bf5-c73827890774",
"snapshot": "chrome", "snapshot": "chrome",
"instruction": "Show me all men's large-size short-sleeve T-shirts with a discount of 50% or more.", "instruction": "Show me all men's large-size short-sleeve shirts with a discount of 50% or more.",
"source": "test_task_1", "source": "test_task_1",
"config": [ "config": [
{ {
@@ -49,10 +49,10 @@
"goto_prefix": "https://www.", "goto_prefix": "https://www.",
"parse_keys": [ "parse_keys": [
"mens_clothing", "mens_clothing",
"t_shirts", "shirts",
"Men_regular_size_t", "Men_regular_size_t",
"Price_discount_range", "Price_discount_range",
"Sleeve_length" "short_sleeve"
] ]
}, },
"expected": { "expected": {
@@ -60,13 +60,13 @@
"rules": { "rules": {
"expected": { "expected": {
"mens_clothing": true, "mens_clothing": true,
"t_shirts": true, "shirts": true,
"Men_regular_size_t": "L", "Men_regular_size_t": "L",
"Price_discount_range": "50_PERCENT_ off & more", "Price_discount_range": "50_PERCENT_ off & more",
"Sleeve_length": "Short Sleeve" "short_sleeve": true
} }
} }
} }
}, },
"proxy": true "proxy": false
} }