Merge branch 'main' into zdy

2024-01-11 23:02:00 +08:00
parent 3c04872dcf b30d87bba8
commit 127a101994
39 changed files with 580 additions and 282 deletions
--- a/desktop_env/evaluators/metrics/README.md
+++ b/desktop_env/evaluators/metrics/README.md
@@ -1,144 +0,0 @@
-# Setup Instructions
-
-## LibreOffice Writer
-
-### Setting Up the python-docx Library
-```shell
-pip install python-docx
-```
-
-## Chrome
-
-### Starting Chrome with Remote Debugging for Python
-
-To enable remote debugging in Chrome, which allows tools like Playwright for Python to connect to and control an existing Chrome instance, follow these steps:
-
-#### Manually Enabling Remote Debugging in Chrome
-
-1. **Locate the Chrome Shortcut**:
-   - Find the Chrome shortcut that you usually use to open the browser. This could be on your desktop, start menu, or taskbar.
-
-2. **Edit Shortcut Properties**:
-   - Right-click on the Chrome shortcut and select `Properties`.
-
-3. **Modify the Target Field**:
-   - In the `Target` field, add `--remote-debugging-port=9222` at the end of the path. Ensure there is a space between the path and the flag you add.
-   - It should look something like this: `"C:\Path\To\Chrome.exe" --remote-debugging-port=9222`.
-
-4. **Apply and Close**:
-   - Click `Apply` and then `OK` to close the dialog.
-
-5. **Start Chrome**:
-   - Use this modified shortcut to start Chrome. Chrome will now start with remote debugging enabled on port 9222.
-
-6. **Confirm Remote Debugging**:
-   - Open a browser and navigate to `http://localhost:9222`. If you see a webpage with information about active tabs, remote debugging is working.
-
---
-
-### Setting Up Playwright for Python
-
-Playwright for Python is a browser automation library to control Chromium, Firefox, and WebKit with a single API.
-
-#### Installing Playwright
-
- Ensure you have Python installed on your system. If not, download and install it from the [Python official website](https://www.python.org/).
-
- Install Playwright using pip (Python Package Installer). Open a command line or terminal and run:
-
-  ```bash
-  pip install playwright
-  ```
-
- After installing Playwright, you need to run the install command to download the necessary browser binaries:
-
-  ```bash
-  playwright install
-  ```
-
-#### Writing a Playwright Script in Python
-
- Create a Python file for your automation script.
-
- Import the Playwright module at the beginning of your script:
-
-  ```python
-  from playwright.sync_api import sync_playwright
-  ```
-
- You can now use Playwright's API to control browsers.
-
-#### Example Playwright Script
-
-Here is a simple example to open a page using Playwright:
-
-```python
-from playwright.sync_api import sync_playwright
-
-def run(playwright):
-    browser = playwright.chromium.launch()
-    page = browser.new_page()
-    page.goto("http://example.com")
-    ## other actions...
-    browser.close()
-
-with sync_playwright() as playwright:
-    run(playwright)
-```
-
- This script launches Chromium, opens a new page, navigates to `example.com`, and then closes the browser.
-
-#### Troubleshooting
-
- If you encounter issues with Playwright, ensure that your Python environment is correctly set up and that you have installed Playwright and its dependencies correctly.
- For detailed documentation, visit the [Playwright for Python Documentation](https://playwright.dev/python/docs/intro).
-
-
-## VLC Media Player
-### Setting Up VLC's HTTP Interface
-
-To enable and use the HTTP interface in VLC Media Player for remote control and status checks, follow these steps:
-
-#### 1. Open VLC Preferences
-
- Open VLC Media Player.
- Go to `Tools` > `Preferences` from the menu.
-
-#### 2. Show All Settings
-
- In the Preferences window, at the bottom left corner, select `All` under `Show settings` to display advanced settings.
-
-#### 3. Enable Main Interfaces
-
- In the advanced preferences, expand the `Interface` section.
- Click on `Main interfaces`.
- Check the box for `Web` to enable the HTTP interface.
-
-#### 4. Configure Lua HTTP
-
- Expand the `Main interfaces` node and select `Lua`.
- Under `Lua HTTP`, set a password in the `Lua HTTP` section. This password will be required to access the HTTP interface.
-
-#### 5. Save and Restart VLC
-
- Click `Save` to apply the changes.
- Restart VLC Media Player for the changes to take effect.
-
-#### 6. Accessing the HTTP Interface
-
- Open a web browser and go to `http://localhost:8080`.
- You will be prompted for a password. Enter the password you set in the Lua HTTP settings.
- Once logged in, you will have access to VLC's HTTP interface for remote control.
-
-#### Packages
-```bash
-
-pip install opencv-python-headless Pillow imagehash
-```
-
-#### Troubleshooting
-
- If you cannot access the HTTP interface, check if your firewall or security software is blocking the connection.
- Ensure VLC is running and the correct port (default is 8080) is being used.
- If the port is in use by another application, you may change the port number in VLC's settings.
-
--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -1,10 +1,13 @@
-from .table import compare_table
-from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom
-from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, compare_insert_equation
 from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
+from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
+    compare_insert_equation
 from .docs import is_first_line_centered, check_file_exists, compare_contains_image
-from .pdf import check_pdf_pages
+from .general import exact_match, fuzzy_match, check_csv, check_accessibility_tree, check_list
 from .libreoffice import check_libre_locale
-from .vlc import is_vlc_playing, is_vlc_recordings_folder
+from .pdf import check_pdf_pages
+from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom
+from .table import compare_table
+from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, compare_images, compare_audios, \
+    compare_videos
 from .general import check_csv, check_accessibility_tree, check_list
 from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
--- a/desktop_env/evaluators/metrics/chrome.py
+++ b/desktop_env/evaluators/metrics/chrome.py
@@ -1,127 +1,15 @@
-import json
-import os
-import platform
-import sqlite3
+import logging

 from playwright.sync_api import sync_playwright

-import logging
 logger = logging.getLogger("desktopenv.metrics.chrome")

-"""
-WARNING: 
-1. Functions from this script assume that no account is registered on Chrome, otherwise the default file path needs to be changed.
-2. The functions are not tested on Windows and Mac, but they should work.
-"""
+


 # todo: move to getter module

 # The following ones just need to load info from the files of software, no need to connect to the software
-def get_default_search_engine():
-    if platform.system() == 'Windows':
-        preference_file_path = os.path.join(os.getenv('LOCALAPPDATA'),
-                                            'Google\\Chrome\\User Data\\Default\\Preferences')
-    elif platform.system() == 'Darwin':
-        preference_file_path = os.path.join(os.getenv('HOME'),
-                                            'Library/Application Support/Google/Chrome/Default/Preferences')
-    elif platform.system() == 'Linux':
-        preference_file_path = os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences')
-    else:
-        raise Exception('Unsupported operating system')
-
-    try:
-        with open(preference_file_path, 'r', encoding='utf-8') as file:
-            data = json.load(file)
-
-        # The path within the JSON data to the default search engine might vary
-        search_engine = data.get('default_search_provider_data', {}).get('template_url_data', {}).get('short_name',
-                                                                                                      'Google')
-        return search_engine
-    except Exception as e:
-        logger.error(f"Error: {e}")
-        return "Google"
-
-
-def get_cookie_data():
-    if platform.system() == 'Windows':
-        chrome_cookie_file_path = os.path.join(os.getenv('LOCALAPPDATA'), 'Google\\Chrome\\User Data\\Default\\Cookies')
-    elif platform.system() == 'Darwin':
-        chrome_cookie_file_path = os.path.join(os.getenv('HOME'),
-                                               'Library/Application Support/Google/Chrome/Default/Cookies')
-    elif platform.system() == 'Linux':
-        chrome_cookie_file_path = os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Cookies')
-    else:
-        raise Exception('Unsupported operating system')
-
-    try:
-        conn = sqlite3.connect(chrome_cookie_file_path)
-        cursor = conn.cursor()
-
-        # Query to check for OpenAI cookies
-        cursor.execute("SELECT * FROM cookies")
-        cookies = cursor.fetchall()
-
-        return cookies
-    except Exception as e:
-        logger.error(f"Error: {e}")
-        return None
-
-
-def get_bookmarks():
-    if platform.system() == 'Windows':
-        preference_file_path = os.path.join(os.getenv('LOCALAPPDATA'),
-                                            'Google\\Chrome\\User Data\\Default\\Bookmarks')
-    elif platform.system() == 'Darwin':
-        preference_file_path = os.path.join(os.getenv('HOME'),
-                                            'Library/Application Support/Google/Chrome/Default/Bookmarks')
-    elif platform.system() == 'Linux':
-        preference_file_path = os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Bookmarks')
-    else:
-        raise Exception('Unsupported operating system')
-
-    try:
-        with open(preference_file_path, 'r', encoding='utf-8') as file:
-            data = json.load(file)
-
-        bookmarks = data.get('roots', {})
-        return bookmarks
-
-    except Exception as e:
-        logger.error(f"Error: {e}")
-        return None
-
-
-def get_extensions_installed_from_shop():
-    """Find the Chrome extensions directory based on the operating system."""
-    os_name = platform.system()
-    if os_name == 'Windows':
-        chrome_extension_dir = os.path.expanduser(
-            '~') + '\\AppData\\Local\\Google\\Chrome\\User Data\\Default\\Extensions\\'
-    elif os_name == 'Darwin':  # macOS
-        chrome_extension_dir = os.path.expanduser(
-            '~') + '/Library/Application Support/Google/Chrome/Default/Extensions/'
-    elif os_name == 'Linux':
-        chrome_extension_dir = os.path.expanduser('~') + '/.config/google-chrome/Default/Extensions/'
-    else:
-        raise Exception('Unsupported operating system')
-
-    manifests = []
-    for extension_id in os.listdir(chrome_extension_dir):
-        extension_path = os.path.join(chrome_extension_dir, extension_id)
-        if os.path.isdir(extension_path):
-            # Iterate through version-named subdirectories
-            for version_dir in os.listdir(extension_path):
-                version_path = os.path.join(extension_path, version_dir)
-                manifest_path = os.path.join(version_path, 'manifest.json')
-                if os.path.isfile(manifest_path):
-                    with open(manifest_path, 'r') as file:
-                        try:
-                            manifest = json.load(file)
-                            manifests.append(manifest)
-                        except json.JSONDecodeError:
-                            logger.error(f"Error reading {manifest_path}")
-    return manifests


 # The following ones require Playwright to be installed on the target machine, and the chrome needs to be pre-config on port info to allow remote debugging, see README.md for details
--- a/desktop_env/evaluators/metrics/general.py
+++ b/desktop_env/evaluators/metrics/general.py
@@ -1,22 +1,32 @@
 import csv
+import functools
+import operator
+import re
+from numbers import Number
+from typing import Callable, Any
+from typing import Dict, List, Pattern

 import lxml.etree
-from lxml.etree import _Element
 from lxml.cssselect import CSSSelector
-
-from typing import Dict, List, Pattern
-from typing import Callable, Any
-from numbers import Number
-
-import operator
+from lxml.etree import _Element
 from rapidfuzz import fuzz
-import functools
-import re

 from .utils import _match_record

-#def _match_record(pattern: Dict[str, str], item: Dict[str, str]) -> bool:
-    #return all(k in item and item[k]==val for k, val in pattern.items())
+def exact_match(result, rules) -> float:
+    expect = rules["expected"]
+    print(result, expect)
+
+    if result == expect:
+        return 1.
+    else:
+        return 0.
+
+
+def fuzzy_match(result, rules) -> float:
+    expect = rules["expected"]
+
+    return fuzz.ratio(result, expect) / 100.

 def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
    """
@@ -46,6 +56,7 @@ def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
            unexpect_metric = unexpect_metric and not any(_match_record(r, rcd) for r in rules.get("unexpect", []))
    return float(all(expect_metrics) and unexpect_metric)

+
 def check_list(result: str, rules: Dict[str, List[str]]) -> float:
    """
    Args:
@@ -75,15 +86,18 @@ def check_list(result: str, rules: Dict[str, List[str]]) -> float:
            unexpect_metric = unexpect_metric and all(r.search(l) is None for r in unexpect_patterns)
    return float(all(expect_metrics) and unexpect_metric)

-_accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org"
-                        , "attr": "uri:deskat:attributes.at-spi.gnome.org"
-                        , "cp": "uri:deskat:component.at-spi.gnome.org"
-                        , "doc": "uri:deskat:document.at-spi.gnome.org"
-                        , "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
-                        , "txt": "uri:deskat:text.at-spi.gnome.org"
-                        , "val": "uri:deskat:value.at-spi.gnome.org"
-                        , "act": "uri:deskat:action.at-spi.gnome.org"
-                        }
+
+_accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org"
+    , "attr": "uri:deskat:attributes.at-spi.gnome.org"
+    , "cp": "uri:deskat:component.at-spi.gnome.org"
+    , "doc": "uri:deskat:document.at-spi.gnome.org"
+    , "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
+    , "txt": "uri:deskat:text.at-spi.gnome.org"
+    , "val": "uri:deskat:value.at-spi.gnome.org"
+    , "act": "uri:deskat:action.at-spi.gnome.org"
+                         }
+
+
 def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:
    """
    Args:
@@ -114,11 +128,12 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:
    else:
        raise ValueError("At least one of xpath and selectors is required")

-    if len(elements)==0:
+    if len(elements) == 0:
        return 0.

    if "text" in rules:
-        match_func: Callable[[str], Number] = functools.partial( operator.eq if rules["exact"] else fuzz.ratio
+        match_func: Callable[[str], Number] = functools.partial( operator.eq if rules["exact"]\
+                                                            else (lambda a, b: fuzz.ratio(a, b)/100.)
                                                               , rules["text"]
                                                               )
        match_score: Number = 0
@@ -129,5 +144,5 @@ def check_accessibility_tree(result: str, rules: Dict[str, Any]) -> float:

    return float(match_score)

-#def check_existence(result: str, *args) -> float:
-    #return 1. - (result is None)
+# def check_existence(result: str, *args) -> float:
+# return 1. - (result is None)
--- a/desktop_env/evaluators/metrics/vlc.py
+++ b/desktop_env/evaluators/metrics/vlc.py
@@ -7,7 +7,10 @@ from xml.etree import ElementTree
 import acoustid
 import cv2
 import imagehash
+import librosa
+import numpy as np
 from PIL import Image
+from skimage.metrics import structural_similarity as ssim

 logger = logging.getLogger("desktopenv.metrics.vlc")

@@ -37,6 +40,7 @@ def is_vlc_playing(actual_status_path: str, rule: Dict[str, str]) -> float:
        return 0


+# fixme: part of this function can be moved to getters
 def is_vlc_recordings_folder(actual_config_path: str, rule: Dict[str, str]) -> float:
    """
    Checks if VLC's recording folder is set to the expected value.
@@ -57,24 +61,79 @@ def is_vlc_recordings_folder(actual_config_path: str, rule: Dict[str, str]) -> f
                current_path = line.split('=')[-1].strip()
                # Compare with the Desktop path
                if current_path == expected_recording_file_path:
-                    return True
+                    return 1
                else:
-                    return False
+                    return 0
            # The configuration key was not found in the file
-            return False
+            return 0
    except FileNotFoundError:
        logger.error("VLC configuration file not found.")
-        return False
+        return 0
    except Exception as e:
        logger.error(f"An error occurred: {e}")
-        return False
+        return 0


 def is_vlc_fullscreen(actual_window_size, screen_size):
    if actual_window_size['width'] == screen_size['width'] and actual_window_size['height'] == screen_size['height']:
-        return True
+        return 1
    else:
-        return False
+        return 0
+
+
+def compare_images(image1_path, image2_path):
+    # You would call this function with the paths to the two images you want to compare:
+    # score = compare_images('path_to_image1', 'path_to_image2')
+    # print("Similarity score:", score)
+
+    # Open the images and convert to grayscale
+    image1 = Image.open(image1_path).convert('L')
+    image2 = Image.open(image2_path).convert('L')
+
+    # Resize images to the smaller one's size for comparison
+    image1_size = image1.size
+    image2_size = image2.size
+    new_size = min(image1_size, image2_size)
+
+    image1 = image1.resize(new_size, Image.Resampling.LANCZOS)
+    image2 = image2.resize(new_size, Image.Resampling.LANCZOS)
+
+    # Convert images to numpy arrays
+    image1_array = np.array(image1)
+    image2_array = np.array(image2)
+
+    # Calculate SSIM between two images
+    similarity_index = ssim(image1_array, image2_array)
+
+    return similarity_index
+
+
+def compare_audios(audio_path_1, audio_path_2, max_distance=1000):
+    """
+    Compare two audio files and return a similarity score in the range [0, 1].
+    audio_path_1, audio_path_2: paths to the audio files to compare
+    max_distance: an empirically determined maximum expected DTW distance
+    """
+    # Example Usage:
+    # similarity = compare_audios_simple('path_to_audio1.mp3', 'path_to_audio2.mp3')
+    # print(f'Similarity Score: {similarity}')
+
+    # Convert to common format if necessary and load audio
+    y1, sr1 = librosa.load(audio_path_1)
+    y2, sr2 = librosa.load(audio_path_2)
+
+    # Extract MFCC features
+    mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1)
+    mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2)
+
+    # Compute Dynamic Time Warping distance
+    distance, path = librosa.sequence.dtw(mfcc1.T, mfcc2.T)
+
+    # Normalize distance to get a similarity score
+    normalized_distance = np.mean(distance) / max_distance
+    similarity_score = 1 - min(normalized_distance, 1)  # Ensure the score is within [0, 1]
+
+    return similarity_score


 def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=5):