sci-gui-agent-benchmark/desktop_env/evaluators/metrics/vlc.py

import logging
import os
import subprocess
from typing import Dict
from xml.etree import ElementTree

import acoustid
import cv2
import imagehash
import pyautogui
import pygetwindow as gw  # todo: change to the library that supports Linux
from PIL import Image

logger = logging.getLogger("desktopenv.metrics.vlc")


def is_vlc_playing(actual_status_path: str, rule: Dict[str, str]) -> float:
    """
    Checks if VLC is currently playing a file.
    """
    with open(actual_status_path, 'rb') as file:
        actual_status = file.read().decode('utf-8')

    tree = ElementTree.fromstring(actual_status)
    status = tree.find('state').text
    if status == 'playing':
        if rule['type'] == 'file_name':
            file_info = tree.find('information/category[@name="meta"]/info[@name="filename"]').text
            if file_info:
                return 1 if file_info.endswith(rule['file_name']) else 0
        elif rule['type'] == 'url':
            file_info = tree.find('information/category[@name="meta"]/info[@name="url"]').text
            if file_info:
                return 1 if file_info.endswith(rule['url']) else 0
        else:
            logger.error(f"Unknown type: {rule['type']}")
            return 0
    else:
        return 0


def is_vlc_recordings_folder(actual_config_path: str, rule: Dict[str, str]) -> float:
    """
    Checks if VLC's recording folder is set to the expected value.
    """
    with open(actual_config_path, 'rb') as file:
        config_file = file.read().decode('utf-8')

    expected_recording_file_path = rule['recording_file_path']

    try:
        for line in config_file:
            # Skip comments and empty lines
            if line.startswith('#') or not line.strip():
                continue
            # Check if the line contains the recording path setting
            if 'recorded_files_path' in line:
                # Extract the value of the recording path and remove surrounding whitespace
                current_path = line.split('=')[-1].strip()
                # Compare with the Desktop path
                if current_path == expected_recording_file_path:
                    return True
                else:
                    return False
            # The configuration key was not found in the file
            return False
    except FileNotFoundError:
        logger.error("VLC configuration file not found.")
        return False
    except Exception as e:
        logger.error(f"An error occurred: {e}")
        return False


def are_audio_files_similar(mp3_file_path, mp4_file_path):
    # Extract audio fingerprint from MP3 file
    mp3_fingerprint, mp3_duration = acoustid.fingerprint_file(mp3_file_path)

    # Extract the audio stream from the MP4 file
    mp4_audio_path = os.path.splitext(mp4_file_path)[0] + '_extracted.mp3'
    try:
        subprocess.run(["ffmpeg", "-i", mp4_file_path, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3",
                        mp4_audio_path], check=True)
    except subprocess.CalledProcessError as e:
        print(f"An error occurred during audio extraction from MP4: {e}")
        return False

    # Extract audio fingerprint from the extracted audio
    mp4_fingerprint, mp4_duration = acoustid.fingerprint_file(mp4_audio_path)

    # Clean up temporary extracted audio file
    os.remove(mp4_audio_path)

    # Compare fingerprints (rudimentary comparison)
    if mp3_duration >= mp4_duration and mp3_fingerprint == mp4_fingerprint:
        return True

    return False


def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=5):
    # Open both video files
    cap1 = cv2.VideoCapture(video_path1)
    cap2 = cv2.VideoCapture(video_path2)

    frames_checked = 0
    mismatch_count = 0

    while frames_checked < max_frames_to_check:
        # Read frames from both videos
        ret1, frame1 = cap1.read()
        ret2, frame2 = cap2.read()

        # If a video ends, then check if both ended to confirm they are of the same length
        if not ret1 or not ret2:
            return ret1 == ret2

        # Convert frames to PIL Images
        frame1 = Image.fromarray(cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB))
        frame2 = Image.fromarray(cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB))

        # Compute the perceptual hash for each frame
        hash1 = imagehash.phash(frame1)
        hash2 = imagehash.phash(frame2)

        # Increment the frames checked
        frames_checked += 1

        # Compute the difference in the hashes
        if hash1 - hash2 > threshold:
            mismatch_count += 1
            # If there's a significant difference, the frames are not the same
            if mismatch_count > threshold:
                return False

    # If we reach here, the content appears to be the same
    return True


def is_vlc_fullscreen():
    """
    Checks if the VLC window is in full-screen mode.

    When VLC is in full-screen mode, its window size matches the screen size with no borders.
    """
    try:
        # Get the VLC window; adjust the title as per your VLC window's title
        vlc_window = gw.getWindowsWithTitle('VLC media player')[0]  # Adjust title if needed
        if not vlc_window:
            return False

        # Get screen size
        screen_width, screen_height = pyautogui.size()

        # Check if VLC window size matches the screen size
        return (vlc_window.width == screen_width and vlc_window.height == screen_height)

    except IndexError:
        # VLC window not found
        logger.error("VLC window not found.")
        return False
    except Exception as e:
        logger.error(f"An error occurred: {e}")
        return False