Finish Impress v2 loading, some pre-process left
This commit is contained in:
@@ -10,6 +10,8 @@ import imagehash
|
||||
import librosa
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from fastdtw import fastdtw
|
||||
from scipy.spatial.distance import cosine
|
||||
from skimage.metrics import structural_similarity as ssim
|
||||
|
||||
logger = logging.getLogger("desktopenv.metrics.vlc")
|
||||
@@ -111,11 +113,10 @@ def compare_images(image1_path, image2_path):
|
||||
return similarity_index
|
||||
|
||||
|
||||
def compare_audios(audio_path_1, audio_path_2, max_distance=1000):
|
||||
def compare_audios(audio_path_1, audio_path_2):
|
||||
"""
|
||||
Compare two audio files and return a similarity score in the range [0, 1].
|
||||
audio_path_1, audio_path_2: paths to the audio files to compare
|
||||
max_distance: an empirically determined maximum expected DTW distance
|
||||
"""
|
||||
# Example Usage:
|
||||
# similarity = compare_audios_simple('path_to_audio1.mp3', 'path_to_audio2.mp3')
|
||||
@@ -125,21 +126,31 @@ def compare_audios(audio_path_1, audio_path_2, max_distance=1000):
|
||||
if not audio_path_1 or not audio_path_2:
|
||||
return 0
|
||||
|
||||
# Load the audio files and extract MFCC features
|
||||
y1, sr1 = librosa.load(audio_path_1)
|
||||
y2, sr2 = librosa.load(audio_path_2)
|
||||
|
||||
# Extract MFCC features
|
||||
mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1)
|
||||
|
||||
y2, sr2 = librosa.load(audio_path_2)
|
||||
mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2)
|
||||
|
||||
# Compute Dynamic Time Warping distance
|
||||
distance, path = librosa.sequence.dtw(mfcc1.T, mfcc2.T)
|
||||
# Normalize the MFCC features
|
||||
mfcc1 = librosa.util.normalize(mfcc1, axis=1)
|
||||
mfcc2 = librosa.util.normalize(mfcc2, axis=1)
|
||||
|
||||
# Normalize distance to get a similarity score
|
||||
normalized_distance = np.mean(distance) / max_distance
|
||||
similarity_score = 1 - min(normalized_distance, 1) # Ensure the score is within [0, 1]
|
||||
# Define a lambda function to compute cosine distance
|
||||
dist_func = lambda x, y: cosine(x, y)
|
||||
|
||||
return similarity_score
|
||||
# Use the DTW algorithm to find the best alignment path
|
||||
distance, path = fastdtw(mfcc1.T, mfcc2.T, dist=dist_func)
|
||||
|
||||
# Calculate the similarity score, here we use 1/(1+distance) to convert distance to a similarity score
|
||||
similarity = 1 / (1 + distance)
|
||||
|
||||
return similarity
|
||||
|
||||
|
||||
def compare_audios_by_dl_model(audio_path_1, audio_path_2):
|
||||
pass
|
||||
|
||||
|
||||
def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=5):
|
||||
|
||||
Reference in New Issue
Block a user