From 132dea15fa1c70b06da961c3cc4dffd8f6adb871 Mon Sep 17 00:00:00 2001 From: David Madl Date: Mon, 27 Apr 2026 11:06:39 +0200 Subject: [PATCH] fix: amplitude cutoff lowpass instead of mean --- rhythm.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/rhythm.py b/rhythm.py index f5cfa60..d010712 100644 --- a/rhythm.py +++ b/rhythm.py @@ -13,8 +13,8 @@ import numpy as np from numpy.fft import fft -from scipy.signal import fftconvolve -from scipy.io import wavfile + +from hsh_signal.signal import lowpass_fft def viterbi_highest_frequency_path_vectorized(Scp2, jump_penalty=2.0, use_log_amplitude=True): Scp2 = np.asarray(Scp2, dtype=float) @@ -103,14 +103,21 @@ class BassAnalyzer: wavelet_win_sec = 0.175 k_omega, k_nu = 0.12, 5.0 #: adapt scaling to get 'reasonable' frequency range (for pop bass, e.g. 18..1145 Hz, but that range strongly depends on the actual song's 'pt' shortest interval 'B') viterbi_jump_penalty = 5000.0 + Wp_force = None + I_force = None - def __init__(self, fs, sig): + def __init__(self, fs, sig, Wp_force=None): """ :param fs: sampling rate :param sig: audio signal normalized to [-1,1] """ self.D = int(self.shift_sec * fs) #: spectrogram step - self.Wp = int(np.round(self.wavelet_win_sec * fs / self.W) * self.W) # wavelet window - make it an integer multiple of FFT window + if self.Wp_force: + self.Wp = self.Wp_force + elif Wp_force: + self.Wp = Wp_force + else: + self.Wp = int(np.round(self.wavelet_win_sec * fs / self.W) * self.W) # wavelet window - make it an integer multiple of FFT window self.U = self.Wp // self.W # ratio self.f = np.pad(sig, (self.W//2, self.W//2-1)) #: signal padded (W-FFT to determine scalogram parameters) @@ -162,7 +169,10 @@ class BassAnalyzer: g = np.abs(Spf) # (M x W) g_bar = np.mean(g, axis=1) # (M) # TODO: check if 'A' needs to be a smooth signal slowly varying over time, not a const. - A = np.mean(g_bar) # amplitude cutoff for pulse train + #A = np.mean(g_bar) # amplitude cutoff for pulse train + ip = int(fs) + g_bar_l = lowpass_fft(np.pad(g_bar, (ip, ip), mode='edge'), fps=fs, cf=0.5, tw=0.05)[ip:-ip] + A = g_bar_l # compute transitions (pulse train) pt = (g_bar > A).astype(int) # pulse train @@ -220,7 +230,10 @@ class BassAnalyzer: T = (Lp - Wp) / fsp # un-padded sample count -> time length #omega = p * T / B # width parameter of Gabor wavelet #nu = B / (p * T) # frequency parameter of Gabor wavelet - I = int(np.log2(p**2 * T**2 / (delta * B**2)) - 3/2) # number of octaves + if self.I_force: + I = self.I_force + else: + I = int(np.log2(p**2 * T**2 / (delta * B**2)) - 3/2) # number of octaves J = int(256 / I) # number of voices per octave r = np.linspace(0, I*J-1, I*J)