Update on Chrome examples; Refactor on logic of controlling

2024-01-12 17:24:47 +08:00
parent 820579a5a2
commit 5a93a32958
17 changed files with 575 additions and 194 deletions
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -1,3 +1,4 @@
+from .chrome import is_expected_tabs, is_expected_bookmarks
 from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
 from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
    compare_insert_equation
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -1,52 +1,39 @@
 import logging
-
-from playwright.sync_api import sync_playwright
+from typing import Any, Dict, List
+from desktop_env.evaluators.metrics.utils import are_lists_equal, compare_urls

 logger = logging.getLogger("desktopenv.metrics.chrome")


+def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
+    """
+    Checks if the expected tabs are open in Chrome.
+    """
+
+    print(open_tabs, rule)
+    match_type = rule['type']
+
+    if match_type == "url":
+        expected_urls = rule['urls']
+        actual_urls = [tab['url'] for tab in open_tabs]
+        return 1 if are_lists_equal(expected_urls, actual_urls, compare_urls) else 0
+    else:
+        logger.error(f"Unknown type: {match_type}")
+        return 0


-# todo: move to getter module
+def is_expected_bookmarks(bookmarks: List[Dict[str, Any]], rule: Dict[str, Any]) -> float:
+    """
+    Checks if the expected bookmarks are in Chrome.
+    """

-# The following ones just need to load info from the files of software, no need to connect to the software
+    # todo
+    match_type = rule['type']

-
-# The following ones require Playwright to be installed on the target machine, and the chrome needs to be pre-config on port info to allow remote debugging, see README.md for details
-
-def get_open_tabs_info(remote_debugging_url):
-    with sync_playwright() as p:
-        # connect to remote Chrome instance
-        browser = p.chromium.connect_over_cdp(remote_debugging_url)
-
-        tabs_info = []
-        for context in browser.contexts:
-            for page in context.pages:
-                title = page.title()
-                url = page.url
-                tabs_info.append({'title': title, 'url': url})
-
-        browser.close()
-        return tabs_info
-
-
-def get_active_tab_info(remote_debugging_url):
-    with sync_playwright() as p:
-        # connect to remote Chrome instance
-        browser = p.chromium.connect_over_cdp(remote_debugging_url)
-
-        active_tab_info = {}
-        for context in browser.contexts:
-            for page in context.pages():
-                if page.is_visible("body"):  # check the visibility of the page body to determine the active status
-                    active_tab_info = {
-                        'title': page.title(),
-                        'url': page.url,
-                        'content': page.content()  # get the HTML content of the page
-                    }
-                    break
-            if active_tab_info:
-                break
-
-        browser.close()
-        return active_tab_info
+    if match_type == "url":
+        expected_urls = rule['urls']
+        actual_urls = [bookmark['url'] for bookmark in bookmarks]
+        return 1 if are_lists_equal(expected_urls, actual_urls, compare_urls) else 0
+    else:
+        logger.error(f"Unknown type: {match_type}")
+        return 0
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -1,26 +1,29 @@
-import zipfile
-import lxml.etree
-import lxml.cssselect
-from lxml.etree import _Element
-import xmltodict
-import openpyxl
-from openpyxl import Workbook
-from openpyxl.worksheet.worksheet import Worksheet
-from openpyxl.chart._chart import ChartBase
-
-from typing import Dict, List, Set
-from typing import Any
-
 import logging
+import zipfile
+from typing import Any
+from typing import Dict, List, Set
+from urllib.parse import urlparse, urlunparse
+
+import lxml.cssselect
+import lxml.etree
+import openpyxl
+import xmltodict
+from lxml.etree import _Element
+from openpyxl import Workbook
+from openpyxl.chart._chart import ChartBase
+from openpyxl.worksheet.worksheet import Worksheet
+
 logger = logging.getLogger("desktopenv.metrics.utils")

-_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
-                   , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
-                   ]
+_xlsx_namespaces = [("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
+    , ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
+                    ]
 _xlsx_ns_mapping = dict(_xlsx_namespaces)
 _xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
 _sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
-#print(_sparklines_selector.css)
+
+
+# print(_sparklines_selector.css)
 def load_sparklines(xlsx_file: str) -> Dict[str, str]:
    """
    This function modifies data_frame in-place
@@ -44,13 +47,14 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]:
    sparklines_dict: Dict[str, str] = {}
    for sp_l in sparklines:
        sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
-        sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
-                                                              , process_namespaces=True
-                                                              , namespaces=_xlsx_ns_imapping
-                                                              )
+        sparkline: Dict[str, Dict[str, str]] = xmltodict.parse(sparkline_xml
+                                                               , process_namespaces=True
+                                                               , namespaces=_xlsx_ns_imapping
+                                                               )
        sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
    return sparklines_dict

+
 # Available Chart Properties:
 # title: str
 # anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1]
@@ -70,7 +74,7 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
        Dict[str, Any]: information of charts
    """

-    #workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
+    # workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
    worksheet: Worksheet = xlsx_file.active
    charts: List[ChartBase] = worksheet._charts

@@ -79,22 +83,22 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
    for ch in charts:
        series: List[str] = []
        for ser in ch.series:
-            value_num = ser.val.numRef.f\
-                     if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\
-                   else ""
-            value_str = ser.val.strRef.f\
-                     if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\
-                   else ""
-            categ_num = ser.cat.numRef.f\
-                     if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\
-                   else ""
-            categ_str = ser.cat.strRef.f\
-                     if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\
-                   else ""
-            series.append( "{:},{:},{:},{:}".format( value_num, value_str
+            value_num = ser.val.numRef.f \
+                if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f") \
+                else ""
+            value_str = ser.val.strRef.f \
+                if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f") \
+                else ""
+            categ_num = ser.cat.numRef.f \
+                if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f") \
+                else ""
+            categ_str = ser.cat.strRef.f \
+                if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f") \
+                else ""
+            series.append("{:},{:},{:},{:}".format(value_num, value_str
                                                   , categ_num, categ_str
                                                   )
-                         )
+                          )
        series: str = ";".join(series)

        # TODO: maybe more aspects, like chart type
@@ -103,10 +107,10 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
        if "title" in chart_props:
            info["title"] = ch.title.tx.rich.p[0].r[0].t
        if "anchor" in chart_props:
-            info["anchor"] = [ ch.anchor.editAs
-                             , ch.anchor._from.col, ch.anchor.to.row
-                             , ch.anchor.to.col, ch.anchor.to.row
-                             ]
+            info["anchor"] = [ch.anchor.editAs
+                , ch.anchor._from.col, ch.anchor.to.row
+                , ch.anchor.to.col, ch.anchor.to.row
+                              ]
        if "width" in chart_props:
            info["width"] = ch.width
        if "height" in chart_props:
@@ -125,40 +129,83 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
        chart_set[series] = info
    return chart_set

+
+def are_lists_equal(list1, list2, comparison_func):
+    # First check if both lists have the same length
+    if len(list1) != len(list2):
+        return False
+
+    # Now make sure each element in one list has an equal element in the other list
+    for item1 in list1:
+        # Use the supplied function to test for an equal item
+        if not any(comparison_func(item1, item2) for item2 in list2):
+            return False
+
+    # If all items match, the lists are equal
+    return True
+
+
+def compare_urls(url1, url2):
+    def normalize_url(url):
+        # Parse the URL
+        parsed_url = urlparse(url)
+
+        # If no scheme is present, assume 'http'
+        scheme = parsed_url.scheme if parsed_url.scheme else 'http'
+
+        # Lowercase the scheme and netloc, remove 'www.', and handle trailing slash
+        normalized_netloc = parsed_url.netloc.lower().replace("www.", "")
+        normalized_path = parsed_url.path if parsed_url.path != '/' else ''
+
+        # Reassemble the URL with normalized components
+        normalized_parsed_url = parsed_url._replace(scheme=scheme.lower(), netloc=normalized_netloc,
+                                                    path=normalized_path)
+        normalized_url = urlunparse(normalized_parsed_url)
+
+        return normalized_url
+
+    # Normalize both URLs for comparison
+    norm_url1 = normalize_url(url1)
+    norm_url2 = normalize_url(url2)
+
+    # Compare the normalized URLs
+    return norm_url1 == norm_url2
+
+
 if __name__ == "__main__":
    path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx"
    workbook1: Workbook = openpyxl.load_workbook(filename=path1)
    worksheet1: Worksheet = workbook1.active
    charts: List[ChartBase] = worksheet1._charts
-    #print(len(charts))
-    #print(type(charts[0]))
-#
-    #print(len(charts[0].series))
-    #print(type(charts[0].series[0]))
-    #print(type(charts[0].series[0].val))
+    # print(len(charts))
+    # print(type(charts[0]))
+    #
+    # print(len(charts[0].series))
+    # print(type(charts[0].series[0]))
+    # print(type(charts[0].series[0].val))
    ##print(charts[0].series[0].val)
-    #print(charts[0].series[0].val.numRef.f)
-#
-    #print(type(charts[0].series[0].cat))
+    # print(charts[0].series[0].val.numRef.f)
+    #
+    # print(type(charts[0].series[0].cat))
    ##print(charts[0].series[0].cat)
-    #print(charts[0].series[0].cat.numRef)
-    #print(charts[0].series[0].cat.strRef)
-    #print(charts[0].series[0].cat.strRef.f)
+    # print(charts[0].series[0].cat.numRef)
+    # print(charts[0].series[0].cat.strRef)
+    # print(charts[0].series[0].cat.strRef.f)

-    #print(type(charts[0].title.tx.strRef))
-    #print(type(charts[0].title.tx.rich))
-    #print(type(charts[0].title.txPr))
-    #print(len(charts[0].title.tx.rich.p))
-    #print(len(charts[0].title.tx.rich.p[0].r))
-    #print(type(charts[0].title.tx.rich.p[0].r[0]))
-    #print(type(charts[0].title.tx.rich.p[0].r[0].t))
-    #print(charts[0].title.tx.rich.p[0].r[0].t)
+    # print(type(charts[0].title.tx.strRef))
+    # print(type(charts[0].title.tx.rich))
+    # print(type(charts[0].title.txPr))
+    # print(len(charts[0].title.tx.rich.p))
+    # print(len(charts[0].title.tx.rich.p[0].r))
+    # print(type(charts[0].title.tx.rich.p[0].r[0]))
+    # print(type(charts[0].title.tx.rich.p[0].r[0].t))
+    # print(charts[0].title.tx.rich.p[0].r[0].t)

-    #print(type(charts[0].anchor))
-    #print(charts[0].anchor.editAs)
-    #print(charts[0].anchor._from.col, charts[0].anchor.to.row)
-    #print(charts[0].anchor.to.col, charts[0].anchor.to.row)
+    # print(type(charts[0].anchor))
+    # print(charts[0].anchor.editAs)
+    # print(charts[0].anchor._from.col, charts[0].anchor.to.row)
+    # print(charts[0].anchor.to.col, charts[0].anchor.to.row)

-    #df1 = pd.read_excel(path1)
-    #print(df1)
+    # df1 = pd.read_excel(path1)
+    # print(df1)
    print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))