Files
sci-gui-agent-benchmark/desktop_env/evaluators/metrics/vlc.py

201 lines
6.8 KiB
Python

import logging
import os
import subprocess
from typing import Dict
from xml.etree import ElementTree
import acoustid
import cv2
import imagehash
from skimage.metrics import structural_similarity as ssim
import librosa
from PIL import Image
import numpy as np
logger = logging.getLogger("desktopenv.metrics.vlc")
def is_vlc_playing(actual_status_path: str, rule: Dict[str, str]) -> float:
"""
Checks if VLC is currently playing a file.
"""
with open(actual_status_path, 'rb') as file:
actual_status = file.read().decode('utf-8')
tree = ElementTree.fromstring(actual_status)
status = tree.find('state').text
if status == 'playing':
if rule['type'] == 'file_name':
file_info = tree.find('information/category[@name="meta"]/info[@name="filename"]').text
if file_info:
return 1 if file_info.endswith(rule['file_name']) else 0
elif rule['type'] == 'url':
file_info = tree.find('information/category[@name="meta"]/info[@name="url"]').text
if file_info:
return 1 if file_info.endswith(rule['url']) else 0
else:
logger.error(f"Unknown type: {rule['type']}")
return 0
else:
return 0
def is_vlc_recordings_folder(actual_config_path: str, rule: Dict[str, str]) -> float:
"""
Checks if VLC's recording folder is set to the expected value.
"""
with open(actual_config_path, 'rb') as file:
config_file = file.read().decode('utf-8')
expected_recording_file_path = rule['recording_file_path']
try:
for line in config_file:
# Skip comments and empty lines
if line.startswith('#') or not line.strip():
continue
# Check if the line contains the recording path setting
if 'recorded_files_path' in line:
# Extract the value of the recording path and remove surrounding whitespace
current_path = line.split('=')[-1].strip()
# Compare with the Desktop path
if current_path == expected_recording_file_path:
return 1
else:
return 0
# The configuration key was not found in the file
return 0
except FileNotFoundError:
logger.error("VLC configuration file not found.")
return 0
except Exception as e:
logger.error(f"An error occurred: {e}")
return 0
def is_vlc_fullscreen(actual_window_size, screen_size):
if actual_window_size['width'] == screen_size['width'] and actual_window_size['height'] == screen_size['height']:
return 1
else:
return 0
def compare_images(image1_path, image2_path):
# You would call this function with the paths to the two images you want to compare:
# score = compare_images('path_to_image1', 'path_to_image2')
# print("Similarity score:", score)
# Open the images and convert to grayscale
image1 = Image.open(image1_path).convert('L')
image2 = Image.open(image2_path).convert('L')
# Resize images to the smaller one's size for comparison
image1_size = image1.size
image2_size = image2.size
new_size = min(image1_size, image2_size)
image1 = image1.resize(new_size, Image.Resampling.LANCZOS)
image2 = image2.resize(new_size, Image.Resampling.LANCZOS)
# Convert images to numpy arrays
image1_array = np.array(image1)
image2_array = np.array(image2)
# Calculate SSIM between two images
similarity_index = ssim(image1_array, image2_array)
return similarity_index
def compare_audios(audio_path_1, audio_path_2, max_distance=1000):
"""
Compare two audio files and return a similarity score in the range [0, 1].
audio_path_1, audio_path_2: paths to the audio files to compare
max_distance: an empirically determined maximum expected DTW distance
"""
# Example Usage:
# similarity = compare_audios_simple('path_to_audio1.mp3', 'path_to_audio2.mp3')
# print(f'Similarity Score: {similarity}')
# Convert to common format if necessary and load audio
y1, sr1 = librosa.load(audio_path_1)
y2, sr2 = librosa.load(audio_path_2)
# Extract MFCC features
mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1)
mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2)
# Compute Dynamic Time Warping distance
distance, path = librosa.sequence.dtw(mfcc1.T, mfcc2.T)
# Normalize distance to get a similarity score
normalized_distance = np.mean(distance) / max_distance
similarity_score = 1 - min(normalized_distance, 1) # Ensure the score is within [0, 1]
return similarity_score
def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=5):
# Open both video files
cap1 = cv2.VideoCapture(video_path1)
cap2 = cv2.VideoCapture(video_path2)
frames_checked = 0
mismatch_count = 0
while frames_checked < max_frames_to_check:
# Read frames from both videos
ret1, frame1 = cap1.read()
ret2, frame2 = cap2.read()
# If a video ends, then check if both ended to confirm they are of the same length
if not ret1 or not ret2:
return ret1 == ret2
# Convert frames to PIL Images
frame1 = Image.fromarray(cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB))
frame2 = Image.fromarray(cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB))
# Compute the perceptual hash for each frame
hash1 = imagehash.phash(frame1)
hash2 = imagehash.phash(frame2)
# Increment the frames checked
frames_checked += 1
# Compute the difference in the hashes
if hash1 - hash2 > threshold:
mismatch_count += 1
# If there's a significant difference, the frames are not the same
if mismatch_count > threshold:
return False
# If we reach here, the content appears to be the same
return True
def are_audio_files_similar(mp3_file_path, mp4_file_path):
# Extract audio fingerprint from MP3 file
mp3_fingerprint, mp3_duration = acoustid.fingerprint_file(mp3_file_path)
# Extract the audio stream from the MP4 file
mp4_audio_path = os.path.splitext(mp4_file_path)[0] + '_extracted.mp3'
try:
subprocess.run(["ffmpeg", "-i", mp4_file_path, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3",
mp4_audio_path], check=True)
except subprocess.CalledProcessError as e:
print(f"An error occurred during audio extraction from MP4: {e}")
return False
# Extract audio fingerprint from the extracted audio
mp4_fingerprint, mp4_duration = acoustid.fingerprint_file(mp4_audio_path)
# Clean up temporary extracted audio file
os.remove(mp4_audio_path)
# Compare fingerprints (rudimentary comparison)
if mp3_duration >= mp4_duration and mp3_fingerprint == mp4_fingerprint:
return True
return False