Remove Silent classification from BinClassifier: not only is it not very useful, it's misaligned with the other classifications because it doesn't account for lag, and so it can make those wrong

This commit is contained in:
Chris Cannam
2022-06-14 13:59:17 +01:00
parent 33a2696b34
commit 638948269b
8 changed files with 215 additions and 28 deletions

View File

@@ -39,8 +39,7 @@ public:
enum class Classification {
Harmonic = 0,
Percussive = 1,
Residual = 2,
Silent = 3
Residual = 2
};
struct Parameters {
@@ -50,18 +49,15 @@ public:
int verticalFilterLength;
double harmonicThreshold;
double percussiveThreshold;
double silenceThreshold;
Parameters(int _binCount, int _horizontalFilterLength,
int _horizontalFilterLag, int _verticalFilterLength,
double _harmonicThreshold, double _percussiveThreshold,
double _silenceThreshold) :
double _harmonicThreshold, double _percussiveThreshold) :
binCount(_binCount),
horizontalFilterLength(_horizontalFilterLength),
horizontalFilterLag(_horizontalFilterLag),
verticalFilterLength(_verticalFilterLength),
harmonicThreshold(_harmonicThreshold),
percussiveThreshold(_percussiveThreshold),
silenceThreshold(_silenceThreshold) { }
percussiveThreshold(_percussiveThreshold) { }
};
BinClassifier(Parameters parameters) :
@@ -118,13 +114,11 @@ public:
}
double eps = 1.0e-7;
for (int i = 0; i < n; ++i) {
Classification c;
if (mag[i] < m_parameters.silenceThreshold) {
c = Classification::Silent;
} else if (double(m_hf[i]) / (double(m_vf[i]) + eps) >
m_parameters.harmonicThreshold) {
if (double(m_hf[i]) / (double(m_vf[i]) + eps) >
m_parameters.harmonicThreshold) {
c = Classification::Harmonic;
} else if (double(m_vf[i]) / (double(m_hf[i]) + eps) >
m_parameters.percussiveThreshold) {

View File

@@ -50,14 +50,17 @@ public:
int fftSize;
int binCount;
double sampleRate;
Parameters(int _fftSize, int _binCount, double _sampleRate) :
fftSize(_fftSize), binCount(_binCount), sampleRate(_sampleRate) { }
int classFilterLength;
Parameters(int _fftSize, int _binCount, double _sampleRate,
int _classFilterLength) :
fftSize(_fftSize), binCount(_binCount), sampleRate(_sampleRate),
classFilterLength(_classFilterLength) { }
};
BinSegmenter(Parameters parameters) :
m_parameters(parameters),
m_numeric(m_parameters.binCount, 0),
m_classFilter(3, 18)
m_classFilter(3, m_parameters.classFilterLength)
{
}
@@ -78,19 +81,19 @@ public:
std::cout << "c:";
for (int i = 0; i < n; ++i) {
if (i > 0) std::cout << ",";
if (m_numeric[i] == 1) {
std::cout << "1";
} else {
std::cout << "0";
}
std::cout << m_numeric[i];
}
std::cout << std::endl;
*/
double f0 = 0.0;
for (int i = 1; i < n; ++i) {
if (m_numeric[i] != 1) {
f0 = frequencyForBin
(i, m_parameters.fftSize, m_parameters.sampleRate);
if (m_numeric[i] != 1) { // percussive
if (i == 1 && m_numeric[0] != 1) { // percussive
f0 = 0.0;
} else {
f0 = frequencyForBin
(i, m_parameters.fftSize, m_parameters.sampleRate);
}
break;
}
}
@@ -101,7 +104,7 @@ public:
for (int i = n - 1; i > 0; --i) {
int c = m_numeric[i];
if (!inPercussive) {
if (c == 2) { // residual/silent
if (c == 2) { // residual
continue;
} else if (c == 1) { // percussive
inPercussive = true;

View File

@@ -58,10 +58,10 @@ R3StretcherImpl::R3StretcherImpl(Parameters parameters,
BinSegmenter::Parameters segmenterParameters
(m_guideConfiguration.classificationFftSize,
classificationBins, m_parameters.sampleRate);
classificationBins, m_parameters.sampleRate, 18);
BinClassifier::Parameters classifierParameters
(classificationBins, 9, 1, 10, 2.0, 2.0, 1.0e-7);
(classificationBins, 9, 1, 10, 2.0, 2.0);
int inRingBufferSize = m_guideConfiguration.longestFftSize * 2;
int outRingBufferSize = m_guideConfiguration.longestFftSize * 16;

View File

@@ -193,9 +193,9 @@ protected:
haveReadahead(false),
classifier(new BinClassifier(classifierParameters)),
classification(classifierParameters.binCount,
BinClassifier::Classification::Silent),
BinClassifier::Classification::Residual),
nextClassification(classifierParameters.binCount,
BinClassifier::Classification::Silent),
BinClassifier::Classification::Residual),
segmenter(new BinSegmenter(segmenterParameters)),
segmentation(), prevSegmentation(), nextSegmentation(),
mixdown(longestFftSize, 0.f), // though it could be shorter