From 1d10514125afebd0e78f2a9629d8bfc8c27f7278 Mon Sep 17 00:00:00 2001 From: Xubin Ren <52506698+Re-bin@users.noreply.github.com> Date: Thu, 10 Apr 2025 17:24:50 +0800 Subject: [PATCH] Fix Search Engine Detection Discrepancy in Chrome Evaluation (#172) * Update bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json * Update __init__.py * Update general.py --- desktop_env/evaluators/metrics/__init__.py | 1 + desktop_env/evaluators/metrics/general.py | 10 +++++++++- .../chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json | 4 ++-- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index d4cc42e..19a450d 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -61,6 +61,7 @@ from .general import ( check_json, check_list, exact_match, + match_in_list, is_in_list, fuzzy_match, check_include_exclude, diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 77e9e6d..a401b74 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -47,6 +47,14 @@ def exact_match(result, rules) -> float: else: return 0. +def match_in_list(result, rules) -> float: + expect = rules["expected"] + print(result, expect) + + if result in expect: + return 1. + else: + return 0. def literal_match(result: Any, expected: Any, **options) -> float: literal_type = options.get('type', 'str') @@ -495,4 +503,4 @@ if __name__ == '__main__': "end": "NYC", "time": "{DoW}, {Month} {DayD}, {Year}", "category": "Miles" - }})) \ No newline at end of file + }})) diff --git a/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json b/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json index f87b862..51bd752 100644 --- a/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json +++ b/evaluation_examples/examples/chrome/bb5e4c0d-f964-439c-97b6-bdb9747de3f4.json @@ -29,14 +29,14 @@ "chrome" ], "evaluator": { - "func": "exact_match", + "func": "match_in_list", "result": { "type": "default_search_engine" }, "expected": { "type": "rule", "rules": { - "expected": "Bing" + "expected": ["Microsoft Bing", "Bing"] } } }