2.4. Audio analysis toolkit#
Introduction#
This notebook provides an interactive tool for analyzing audio signals. It allows users to upload a pre-recorded audio file and explore its properties through various visualizations and spectral analysis techniques.
You can run this code through Google Colab by opening this link.
π― Features & Usage#
β Upload Audio: Users can upload a .wav file for analysis.
β Waveform Visualization: Displays the time-domain representation of the audio signal.
β Spectrum Analysis (FFT): Computes and plots the frequency spectrum using Fast Fourier Transform (FFT).
β Spectrogram: Generates a time-frequency representation, showing how frequencies evolve over time.
β Third-Octave Band Analysis: Visualizes sound levels in standard third-octave bands, useful in acoustics and environmental noise analysis.
πΉ How to Use#
First install all needed packages by running the following code cell, which takes less than 20 seconds.
Then, run the main code cell below.
Installation of packages#
pip install sounddevice numpy matplotlib scipy ipywidgets soundfile
Main code#
Click βUpload Audioβ and select a .wav file. Use the buttons to analyze:
π Waveform
π΅ Spectrum
π Spectrogram
πΌ Third-Octave Band SPL
import numpy as np
import matplotlib.pyplot as plt
from scipy.fftpack import fft
from scipy.signal import spectrogram
import ipywidgets as widgets
from IPython.display import display, clear_output
import soundfile as sf
from google.colab import files
# Parameters
fs = None # Sampling frequency (will be set after upload)
audio = None # Variable to store loaded audio
# Create output areas for plots
output_waveform = widgets.Output()
output_spectrum = widgets.Output()
output_spectrogram = widgets.Output()
output_third_octave = widgets.Output()
# Function to upload and load audio file
def upload_audio():
global audio, fs
uploaded = files.upload()
filename = list(uploaded.keys())[0] # Get the uploaded file name
audio, fs = sf.read(filename) # Read audio file
print(f"Loaded {filename} with sample rate {fs} Hz")
# Function to plot the waveform
def plot_waveform():
global audio, fs
if audio is None:
with output_waveform:
clear_output(wait=True)
print("No audio loaded yet.")
return
with output_waveform:
clear_output(wait=True)
time = np.linspace(0, len(audio) / fs, len(audio))
plt.figure(figsize=(10, 4))
plt.plot(time, audio, label="Audio Signal")
plt.title("Audio Waveform")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.grid()
plt.legend()
plt.show()
# Function to plot the spectrum
def plot_spectrum():
global audio, fs
if audio is None:
with output_spectrum:
clear_output(wait=True)
print("No audio loaded yet.")
return
with output_spectrum:
clear_output(wait=True)
N = len(audio)
audio_fft = fft(audio.flatten())
frequencies = np.linspace(0, fs / 2, int(N / 2))
magnitude = np.abs(audio_fft[: N // 2]) * 2 / N
plt.figure(figsize=(10, 4))
plt.plot(frequencies, magnitude, label="Spectrum")
plt.title("Spectrum of the Audio File")
plt.xlabel("Frequency (Hz)")
plt.ylabel("Magnitude")
plt.grid()
plt.legend()
plt.show()
# Function to plot the spectrogram
def plot_spectrogram():
global audio, fs
if audio is None:
with output_spectrogram:
clear_output(wait=True)
print("No audio loaded yet.")
return
with output_spectrogram:
clear_output(wait=True)
f_spec, t_spec, Sxx = spectrogram(audio.flatten(), fs, nperseg=1024, noverlap=512)
plt.figure(figsize=(10, 6))
plt.pcolormesh(t_spec, f_spec, 10 * np.log10(Sxx), shading="gouraud")
plt.ylabel("Frequency (Hz)", fontsize=14)
plt.xlabel("Time (s)", fontsize=14)
plt.title("Spectrogram", fontsize=16)
plt.colorbar(label="Amplitude (dB)")
plt.show()
# Function to plot the third-octave band SPL
def plot_third_octave():
global audio, fs
if audio is None:
with output_third_octave:
clear_output(wait=True)
print("No audio loaded yet.")
return
with output_third_octave:
clear_output(wait=True)
y = audio.flatten()
slength = len(y) / fs
dt = 1.0 / fs
Y = fft(y) * dt
Lp = 20 * np.log10(np.abs(Y) / slength)
f = np.linspace(0, fs, len(Lp))
df = 1.0 / slength
# Define third-octave band centers
fcenter = np.array(
[
20, 25, 31, 40, 50, 63, 80, 100, 125, 160, 200, 250, 315, 400, 500,
630, 800, 1000, 1250, 1600, 2000, 2500, 3150, 4000, 5000, 6300,
8000, 10000, 12500, 16000, 20000,
]
)
flower = np.round(2 ** (-1 / 6) * fcenter)
fupper = np.round(2 ** (1 / 6) * fcenter)
# Third-octave analysis
Lp_third = []
for f1, f2 in zip(flower[:-1], flower[1:]):
indices = np.where((f >= f1) & (f < f2))[0]
if len(indices) > 0:
Lp_third.append(10 * np.log10(df * np.sum(10 ** (Lp[indices] / 10))))
# Plot third-octave spectrum
plt.figure(figsize=(10, 4))
plt.bar(flower[:-1], Lp_third, width=np.diff(flower), align="edge", color="k")
plt.xlabel("Frequency (Hz)", fontsize=14)
plt.ylabel("L_p (dB)", fontsize=14)
plt.title("Third-Octave Spectrum", fontsize=16)
plt.grid(True)
plt.show()
# Create widgets
upload_button = widgets.Button(description="Upload Audio", button_style="success")
waveform_button = widgets.Button(description="Plot Waveform", button_style="primary")
spectrum_button = widgets.Button(description="Plot Spectrum", button_style="primary")
spectrogram_button = widgets.Button(description="Plot Spectrogram", button_style="primary")
third_octave_button = widgets.Button(description="Plot Third-Octave", button_style="primary")
# Attach actions to buttons
def on_upload_button_clicked(b):
upload_audio()
def on_waveform_button_clicked(b):
plot_waveform()
def on_spectrum_button_clicked(b):
plot_spectrum()
def on_spectrogram_button_clicked(b):
plot_spectrogram()
def on_third_octave_button_clicked(b):
plot_third_octave()
upload_button.on_click(on_upload_button_clicked)
waveform_button.on_click(on_waveform_button_clicked)
spectrum_button.on_click(on_spectrum_button_clicked)
spectrogram_button.on_click(on_spectrogram_button_clicked)
third_octave_button.on_click(on_third_octave_button_clicked)
# Display widgets and output areas
controls = widgets.VBox(
[
upload_button,
widgets.HBox([waveform_button, spectrum_button]),
widgets.HBox([spectrogram_button, third_octave_button]),
]
)
display(controls)
display(output_waveform, output_spectrum, output_spectrogram, output_third_octave)