Four commit

93f46b7a · orsier · fec73dcf · 93f46b7a · 93f46b7a
Commit 93f46b7a authored Jan 15, 2025 by orsier
Showing with 155 additions and 0 deletions
README.txt
metrics/moydata.py
--- a/README.txt
+++ b/README.txt
+metrics: Files used for calculating the metrics
+models_logs: Log files generated by training the models
+output_examples: Some output audios generated by the pretrained models
+pretrain_models: The pretrained models compressed in files
\ No newline at end of file
--- a/metrics/moydata.py
+++ b/metrics/moydata.py
+import librosa
+import numpy as np
+import os
+from mir_eval.separation import bss_eval_sources
+import pandas as pd
+
+# Function for normalizing audio based on a common maximum amplitude
+def normalize_audio_common(voices, target, mixture):
+    max_amplitude = max(np.max(np.abs(voices)), np.max(np.abs(target)), np.max(np.abs(mixture)))
+    if max_amplitude > 0:
+        voices = voices / max_amplitude
+        target = target / max_amplitude
+        mixture = mixture / max_amplitude
+    return voices, target, mixture
+
+# Function to load and normalize an audio file
+def load_and_normalize_audio(file_path):
+    audio, sr = librosa.load(file_path, sr=None)
+    return audio, sr
+
+# Function to calculate MSE
+def calculate_mse(signal1, signal2):
+    return np.mean((signal1 - signal2) ** 2)
+
+# Function to calculate SNR
+def calculate_snr(signal1, signal2):
+    signal_power = np.sum(signal2 ** 2)
+    noise_power = np.sum((signal1 - signal2) ** 2)
+    return 10 * np.log10(signal_power / noise_power)
+
+# Function to calculate SDR
+def calculate_sdr(signal1, signal2):
+    sdr, _, _, _ = bss_eval_sources(signal2[np.newaxis, :], signal1[np.newaxis, :])
+    return sdr[0]
+
+# Folder containing subfolders 217 to 236
+parent_dir = "C:\\Users\\33783\\Documents\\mles_proj\\test_guitarGDrive"
+
+# List of folders (217 to 236)
+folders = [str(i) for i in range(217, 237)]
+
+# Results to save
+results = []
+check_all_norm_results = []
+
+# Loop through each folder
+for folder in folders:
+    folder_path = os.path.join(parent_dir, folder)
+
+    # Load the audio files
+    mixture_path = os.path.join(folder_path, "mixture.wav")
+    target_path = os.path.join(folder_path, "target.wav")
+    vocals_path = os.path.join(folder_path, "vocals.wav")
+
+    # Load audio files without normalizing yet
+    try:
+        mixture, _ = load_and_normalize_audio(mixture_path)
+        target, _ = load_and_normalize_audio(target_path)
+        vocals, _ = load_and_normalize_audio(vocals_path)
+        
+        # Capture amplitude values before normalization
+        max_mixture_before = np.max(np.abs(mixture))
+        rms_mixture_before = np.sqrt(np.mean(mixture ** 2))
+
+        max_target_before = np.max(np.abs(target))
+        rms_target_before = np.sqrt(np.mean(target ** 2))
+
+        max_vocals_before = np.max(np.abs(vocals))
+        rms_vocals_before = np.sqrt(np.mean(vocals ** 2))
+
+        # Normalize all signals to the same maximum amplitude
+        mixture, target, vocals = normalize_audio_common(vocals, target, mixture)
+        
+        # Capture amplitude values after normalization
+        max_mixture_after = np.max(np.abs(mixture))
+        rms_mixture_after = np.sqrt(np.mean(mixture ** 2))
+
+        max_target_after = np.max(np.abs(target))
+        rms_target_after = np.sqrt(np.mean(target ** 2))
+
+        max_vocals_after = np.max(np.abs(vocals))
+        rms_vocals_after = np.sqrt(np.mean(vocals ** 2))
+
+        # Also load the mixture without normalization for SNR comparison
+        mixture_nonormalization, _ = librosa.load(mixture_path, sr=None)
+
+    except Exception as e:
+        print(f"Error loading or normalizing files in folder {folder}: {e}")
+        continue
+
+    # Calculate metrics for mixture vs target (with normalization)
+    mse_mixture_target = calculate_mse(mixture, target)
+    snr_mixture_target = calculate_snr(mixture, target)
+    sdr_mixture_target = calculate_sdr(mixture, target)
+
+    # Calculate metrics for vocals vs target
+    mse_vocals_target = calculate_mse(vocals, target)
+    snr_vocals_target = calculate_snr(vocals, target)
+    sdr_vocals_target = calculate_sdr(vocals, target)
+
+    # Calculate SNR for mixture without normalization (for comparison)
+    snr_mixture_target_nonormalization = calculate_snr(mixture_nonormalization, target)
+
+    # Store results for this folder
+    results.append({
+        "folder": folder,
+        "mse_mixture_target": mse_mixture_target,
+        "snr_mixture_target": snr_mixture_target,
+        "sdr_mixture_target": sdr_mixture_target,
+        "mse_vocals_target": mse_vocals_target,
+        "snr_vocals_target": snr_vocals_target,
+        "sdr_vocals_target": sdr_vocals_target,
+        "snr_mixture_target_nonormalization": snr_mixture_target_nonormalization
+    })
+
+    # Store checking results (amplitude and RMS before and after normalization)
+    check_all_norm_results.append({
+        "folder": folder,
+        "max_mixture_before": max_mixture_before,
+        "rms_mixture_before": rms_mixture_before,
+        "max_target_before": max_target_before,
+        "rms_target_before": rms_target_before,
+        "max_vocals_before": max_vocals_before,
+        "rms_vocals_before": rms_vocals_before,
+        "max_mixture_after": max_mixture_after,
+        "rms_mixture_after": rms_mixture_after,
+        "max_target_after": max_target_after,
+        "rms_target_after": rms_target_after,
+        "max_vocals_after": max_vocals_after,
+        "rms_vocals_after": rms_vocals_after
+    })
+
+# Save or display the results
+df_results = pd.DataFrame(results)
+df_results.to_csv("audio_comparison_results_normalized.csv", index=False)
+
+# Save checking results to CSV
+df_check_all_norm = pd.DataFrame(check_all_norm_results)
+df_check_all_norm.to_csv("checkallnorm.csv", index=False)
+
+# Display the results in the console
+print(df_results)
+print(df_check_all_norm)
+
+# Calculate and print averages for SNR values
+avg_snr_mixture_target = df_results["snr_mixture_target"].mean()
+avg_snr_vocals_target = df_results["snr_vocals_target"].mean()
+
+print(f"Average SNR for mixture vs target: {avg_snr_mixture_target:.2f} dB")
+print(f"Average SNR for vocals vs target: {avg_snr_vocals_target:.2f} dB")