Audio analysis toolkit

2.4. Audio analysis toolkit#

Introduction#

This notebook provides an interactive tool for analyzing audio signals. It allows users to upload a pre-recorded audio file and explore its properties through various visualizations and spectral analysis techniques.

You can run this code through Google Colab by opening this link.

🎯 Features & Usage#

✅ Upload Audio: Users can upload a .wav file for analysis.
✅ Waveform Visualization: Displays the time-domain representation of the audio signal.
✅ Spectrum Analysis (FFT): Computes and plots the frequency spectrum using Fast Fourier Transform (FFT).
✅ Spectrogram: Generates a time-frequency representation, showing how frequencies evolve over time.
✅ Third-Octave Band Analysis: Visualizes sound levels in standard third-octave bands, useful in acoustics and environmental noise analysis.

🔹 How to Use#

First install all needed packages by running the following code cell, which takes less than 20 seconds.
Then, run the main code cell below.

Installation of packages#

pip install sounddevice numpy matplotlib scipy ipywidgets soundfile

Main code#

Click “Upload Audio” and select a .wav file. Use the buttons to analyze:

📈 Waveform
🎵 Spectrum
🔊 Spectrogram
🎼 Third-Octave Band SPL

import numpy as np
import matplotlib.pyplot as plt
from scipy.fftpack import fft
from scipy.signal import spectrogram
import ipywidgets as widgets
from IPython.display import display, clear_output
import soundfile as sf
from google.colab import files

# Parameters
fs = None  # Sampling frequency (will be set after upload)
audio = None  # Variable to store loaded audio

# Create output areas for plots
output_waveform = widgets.Output()
output_spectrum = widgets.Output()
output_spectrogram = widgets.Output()
output_third_octave = widgets.Output()

# Function to upload and load audio file
def upload_audio():
    global audio, fs
    uploaded = files.upload()
    filename = list(uploaded.keys())[0]  # Get the uploaded file name
    audio, fs = sf.read(filename)  # Read audio file
    print(f"Loaded {filename} with sample rate {fs} Hz")

# Function to plot the waveform
def plot_waveform():
    global audio, fs
    if audio is None:
        with output_waveform:
            clear_output(wait=True)
            print("No audio loaded yet.")
        return
    with output_waveform:
        clear_output(wait=True)
        time = np.linspace(0, len(audio) / fs, len(audio))
        plt.figure(figsize=(10, 4))
        plt.plot(time, audio, label="Audio Signal")
        plt.title("Audio Waveform")
        plt.xlabel("Time (s)")
        plt.ylabel("Amplitude")
        plt.grid()
        plt.legend()
        plt.show()

# Function to plot the spectrum
def plot_spectrum():
    global audio, fs
    if audio is None:
        with output_spectrum:
            clear_output(wait=True)
            print("No audio loaded yet.")
        return
    with output_spectrum:
        clear_output(wait=True)
        N = len(audio)
        audio_fft = fft(audio.flatten())
        frequencies = np.linspace(0, fs / 2, int(N / 2))
        magnitude = np.abs(audio_fft[: N // 2]) * 2 / N
        plt.figure(figsize=(10, 4))
        plt.plot(frequencies, magnitude, label="Spectrum")
        plt.title("Spectrum of the Audio File")
        plt.xlabel("Frequency (Hz)")
        plt.ylabel("Magnitude")
        plt.grid()
        plt.legend()
        plt.show()

# Function to plot the spectrogram
def plot_spectrogram():
    global audio, fs
    if audio is None:
        with output_spectrogram:
            clear_output(wait=True)
            print("No audio loaded yet.")
        return
    with output_spectrogram:
        clear_output(wait=True)
        f_spec, t_spec, Sxx = spectrogram(audio.flatten(), fs, nperseg=1024, noverlap=512)
        plt.figure(figsize=(10, 6))
        plt.pcolormesh(t_spec, f_spec, 10 * np.log10(Sxx), shading="gouraud")
        plt.ylabel("Frequency (Hz)", fontsize=14)
        plt.xlabel("Time (s)", fontsize=14)
        plt.title("Spectrogram", fontsize=16)
        plt.colorbar(label="Amplitude (dB)")
        plt.show()

# Function to plot the third-octave band SPL
def plot_third_octave():
    global audio, fs
    if audio is None:
        with output_third_octave:
            clear_output(wait=True)
            print("No audio loaded yet.")
        return
    with output_third_octave:
        clear_output(wait=True)
        y = audio.flatten()
        slength = len(y) / fs
        dt = 1.0 / fs
        Y = fft(y) * dt
        Lp = 20 * np.log10(np.abs(Y) / slength)
        f = np.linspace(0, fs, len(Lp))
        df = 1.0 / slength

        # Define third-octave band centers
        fcenter = np.array(
            [
                20, 25, 31, 40, 50, 63, 80, 100, 125, 160, 200, 250, 315, 400, 500,
                630, 800, 1000, 1250, 1600, 2000, 2500, 3150, 4000, 5000, 6300,
                8000, 10000, 12500, 16000, 20000,
            ]
        )
        flower = np.round(2 ** (-1 / 6) * fcenter)
        fupper = np.round(2 ** (1 / 6) * fcenter)

        # Third-octave analysis
        Lp_third = []
        for f1, f2 in zip(flower[:-1], flower[1:]):
            indices = np.where((f >= f1) & (f < f2))[0]
            if len(indices) > 0:
                Lp_third.append(10 * np.log10(df * np.sum(10 ** (Lp[indices] / 10))))

        # Plot third-octave spectrum
        plt.figure(figsize=(10, 4))
        plt.bar(flower[:-1], Lp_third, width=np.diff(flower), align="edge", color="k")
        plt.xlabel("Frequency (Hz)", fontsize=14)
        plt.ylabel("L_p (dB)", fontsize=14)
        plt.title("Third-Octave Spectrum", fontsize=16)
        plt.grid(True)
        plt.show()

# Create widgets
upload_button = widgets.Button(description="Upload Audio", button_style="success")
waveform_button = widgets.Button(description="Plot Waveform", button_style="primary")
spectrum_button = widgets.Button(description="Plot Spectrum", button_style="primary")
spectrogram_button = widgets.Button(description="Plot Spectrogram", button_style="primary")
third_octave_button = widgets.Button(description="Plot Third-Octave", button_style="primary")

# Attach actions to buttons
def on_upload_button_clicked(b):
    upload_audio()

def on_waveform_button_clicked(b):
    plot_waveform()

def on_spectrum_button_clicked(b):
    plot_spectrum()

def on_spectrogram_button_clicked(b):
    plot_spectrogram()

def on_third_octave_button_clicked(b):
    plot_third_octave()

upload_button.on_click(on_upload_button_clicked)
waveform_button.on_click(on_waveform_button_clicked)
spectrum_button.on_click(on_spectrum_button_clicked)
spectrogram_button.on_click(on_spectrogram_button_clicked)
third_octave_button.on_click(on_third_octave_button_clicked)

# Display widgets and output areas
controls = widgets.VBox(
    [
        upload_button,
        widgets.HBox([waveform_button, spectrum_button]),
        widgets.HBox([spectrogram_button, third_octave_button]),
    ]
)
display(controls)
display(output_waveform, output_spectrum, output_spectrogram, output_third_octave)