fix: amplitude cutoff lowpass instead of mean

This commit is contained in:
2026-04-27 11:06:39 +02:00
parent 3dc2e4d3d5
commit 132dea15fa

View File

@@ -13,8 +13,8 @@
import numpy as np import numpy as np
from numpy.fft import fft from numpy.fft import fft
from scipy.signal import fftconvolve
from scipy.io import wavfile from hsh_signal.signal import lowpass_fft
def viterbi_highest_frequency_path_vectorized(Scp2, jump_penalty=2.0, use_log_amplitude=True): def viterbi_highest_frequency_path_vectorized(Scp2, jump_penalty=2.0, use_log_amplitude=True):
Scp2 = np.asarray(Scp2, dtype=float) Scp2 = np.asarray(Scp2, dtype=float)
@@ -103,14 +103,21 @@ class BassAnalyzer:
wavelet_win_sec = 0.175 wavelet_win_sec = 0.175
k_omega, k_nu = 0.12, 5.0 #: adapt scaling to get 'reasonable' frequency range (for pop bass, e.g. 18..1145 Hz, but that range strongly depends on the actual song's 'pt' shortest interval 'B') k_omega, k_nu = 0.12, 5.0 #: adapt scaling to get 'reasonable' frequency range (for pop bass, e.g. 18..1145 Hz, but that range strongly depends on the actual song's 'pt' shortest interval 'B')
viterbi_jump_penalty = 5000.0 viterbi_jump_penalty = 5000.0
Wp_force = None
I_force = None
def __init__(self, fs, sig): def __init__(self, fs, sig, Wp_force=None):
""" """
:param fs: sampling rate :param fs: sampling rate
:param sig: audio signal normalized to [-1,1] :param sig: audio signal normalized to [-1,1]
""" """
self.D = int(self.shift_sec * fs) #: spectrogram step self.D = int(self.shift_sec * fs) #: spectrogram step
self.Wp = int(np.round(self.wavelet_win_sec * fs / self.W) * self.W) # wavelet window - make it an integer multiple of FFT window if self.Wp_force:
self.Wp = self.Wp_force
elif Wp_force:
self.Wp = Wp_force
else:
self.Wp = int(np.round(self.wavelet_win_sec * fs / self.W) * self.W) # wavelet window - make it an integer multiple of FFT window
self.U = self.Wp // self.W # ratio self.U = self.Wp // self.W # ratio
self.f = np.pad(sig, (self.W//2, self.W//2-1)) #: signal padded (W-FFT to determine scalogram parameters) self.f = np.pad(sig, (self.W//2, self.W//2-1)) #: signal padded (W-FFT to determine scalogram parameters)
@@ -162,7 +169,10 @@ class BassAnalyzer:
g = np.abs(Spf) # (M x W) g = np.abs(Spf) # (M x W)
g_bar = np.mean(g, axis=1) # (M) g_bar = np.mean(g, axis=1) # (M)
# TODO: check if 'A' needs to be a smooth signal slowly varying over time, not a const. # TODO: check if 'A' needs to be a smooth signal slowly varying over time, not a const.
A = np.mean(g_bar) # amplitude cutoff for pulse train #A = np.mean(g_bar) # amplitude cutoff for pulse train
ip = int(fs)
g_bar_l = lowpass_fft(np.pad(g_bar, (ip, ip), mode='edge'), fps=fs, cf=0.5, tw=0.05)[ip:-ip]
A = g_bar_l
# compute transitions (pulse train) # compute transitions (pulse train)
pt = (g_bar > A).astype(int) # pulse train pt = (g_bar > A).astype(int) # pulse train
@@ -220,7 +230,10 @@ class BassAnalyzer:
T = (Lp - Wp) / fsp # un-padded sample count -> time length T = (Lp - Wp) / fsp # un-padded sample count -> time length
#omega = p * T / B # width parameter of Gabor wavelet #omega = p * T / B # width parameter of Gabor wavelet
#nu = B / (p * T) # frequency parameter of Gabor wavelet #nu = B / (p * T) # frequency parameter of Gabor wavelet
I = int(np.log2(p**2 * T**2 / (delta * B**2)) - 3/2) # number of octaves if self.I_force:
I = self.I_force
else:
I = int(np.log2(p**2 * T**2 / (delta * B**2)) - 3/2) # number of octaves
J = int(256 / I) # number of voices per octave J = int(256 / I) # number of voices per octave
r = np.linspace(0, I*J-1, I*J) r = np.linspace(0, I*J-1, I*J)