Remove Silent classification from BinClassifier: not only is it not very useful, it's misaligned with the other classifications because it doesn't account for lag, and so it can make those wrong

This commit is contained in:
Chris Cannam
2022-06-14 13:59:17 +01:00
parent 33a2696b34
commit 638948269b
8 changed files with 215 additions and 28 deletions

View File

@@ -16,6 +16,7 @@ build_script:
- call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
- meson build "-Dextra_include_dirs=C:\Program Files\libsndfile\include" "-Dextra_lib_dirs=C:\Program Files\libsndfile\lib"
- ninja -C build
- meson test -C build
# Test the VC++ static library build, which is separate
- msbuild otherbuilds\rubberband-library.vcxproj /t:Build /p:Configuration=Release
# And test the .NET FFI interface build, which is again separate

View File

@@ -21,6 +21,8 @@ jobs:
run: ninja -C build_macos
- name: make ios
run: ninja -C build_ios
- name: unit test macos
run: meson test -C build_macos
- name: check otherbuilds
run: otherbuilds/check.sh

View File

@@ -90,6 +90,7 @@ unit_test_sources = [
'src/test/TestVectorOpsComplex.cpp',
'src/test/TestVectorOps.cpp',
'src/test/TestSignalBits.cpp',
'src/test/TestBinClassifier.cpp',
'src/test/test.cpp',
]

View File

@@ -39,8 +39,7 @@ public:
enum class Classification {
Harmonic = 0,
Percussive = 1,
Residual = 2,
Silent = 3
Residual = 2
};
struct Parameters {
@@ -50,18 +49,15 @@ public:
int verticalFilterLength;
double harmonicThreshold;
double percussiveThreshold;
double silenceThreshold;
Parameters(int _binCount, int _horizontalFilterLength,
int _horizontalFilterLag, int _verticalFilterLength,
double _harmonicThreshold, double _percussiveThreshold,
double _silenceThreshold) :
double _harmonicThreshold, double _percussiveThreshold) :
binCount(_binCount),
horizontalFilterLength(_horizontalFilterLength),
horizontalFilterLag(_horizontalFilterLag),
verticalFilterLength(_verticalFilterLength),
harmonicThreshold(_harmonicThreshold),
percussiveThreshold(_percussiveThreshold),
silenceThreshold(_silenceThreshold) { }
percussiveThreshold(_percussiveThreshold) { }
};
BinClassifier(Parameters parameters) :
@@ -121,9 +117,7 @@ public:
for (int i = 0; i < n; ++i) {
Classification c;
if (mag[i] < m_parameters.silenceThreshold) {
c = Classification::Silent;
} else if (double(m_hf[i]) / (double(m_vf[i]) + eps) >
if (double(m_hf[i]) / (double(m_vf[i]) + eps) >
m_parameters.harmonicThreshold) {
c = Classification::Harmonic;
} else if (double(m_vf[i]) / (double(m_hf[i]) + eps) >

View File

@@ -50,14 +50,17 @@ public:
int fftSize;
int binCount;
double sampleRate;
Parameters(int _fftSize, int _binCount, double _sampleRate) :
fftSize(_fftSize), binCount(_binCount), sampleRate(_sampleRate) { }
int classFilterLength;
Parameters(int _fftSize, int _binCount, double _sampleRate,
int _classFilterLength) :
fftSize(_fftSize), binCount(_binCount), sampleRate(_sampleRate),
classFilterLength(_classFilterLength) { }
};
BinSegmenter(Parameters parameters) :
m_parameters(parameters),
m_numeric(m_parameters.binCount, 0),
m_classFilter(3, 18)
m_classFilter(3, m_parameters.classFilterLength)
{
}
@@ -78,19 +81,19 @@ public:
std::cout << "c:";
for (int i = 0; i < n; ++i) {
if (i > 0) std::cout << ",";
if (m_numeric[i] == 1) {
std::cout << "1";
} else {
std::cout << "0";
}
std::cout << m_numeric[i];
}
std::cout << std::endl;
*/
double f0 = 0.0;
for (int i = 1; i < n; ++i) {
if (m_numeric[i] != 1) {
if (m_numeric[i] != 1) { // percussive
if (i == 1 && m_numeric[0] != 1) { // percussive
f0 = 0.0;
} else {
f0 = frequencyForBin
(i, m_parameters.fftSize, m_parameters.sampleRate);
}
break;
}
}
@@ -101,7 +104,7 @@ public:
for (int i = n - 1; i > 0; --i) {
int c = m_numeric[i];
if (!inPercussive) {
if (c == 2) { // residual/silent
if (c == 2) { // residual
continue;
} else if (c == 1) { // percussive
inPercussive = true;

View File

@@ -58,10 +58,10 @@ R3StretcherImpl::R3StretcherImpl(Parameters parameters,
BinSegmenter::Parameters segmenterParameters
(m_guideConfiguration.classificationFftSize,
classificationBins, m_parameters.sampleRate);
classificationBins, m_parameters.sampleRate, 18);
BinClassifier::Parameters classifierParameters
(classificationBins, 9, 1, 10, 2.0, 2.0, 1.0e-7);
(classificationBins, 9, 1, 10, 2.0, 2.0);
int inRingBufferSize = m_guideConfiguration.longestFftSize * 2;
int outRingBufferSize = m_guideConfiguration.longestFftSize * 16;

View File

@@ -193,9 +193,9 @@ protected:
haveReadahead(false),
classifier(new BinClassifier(classifierParameters)),
classification(classifierParameters.binCount,
BinClassifier::Classification::Silent),
BinClassifier::Classification::Residual),
nextClassification(classifierParameters.binCount,
BinClassifier::Classification::Silent),
BinClassifier::Classification::Residual),
segmenter(new BinSegmenter(segmenterParameters)),
segmentation(), prevSegmentation(), nextSegmentation(),
mixdown(longestFftSize, 0.f), // though it could be shorter

View File

@@ -0,0 +1,186 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
Rubber Band Library
An audio time-stretching and pitch-shifting library.
Copyright 2007-2022 Particular Programs Ltd.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
Alternatively, if you have a valid commercial licence for the
Rubber Band Library obtained by agreement with the copyright
holders, you may redistribute and/or modify it under the terms
described in that licence.
If you wish to distribute code using the Rubber Band Library
under terms other than those of the GNU General Public License,
you must obtain a valid commercial licence before doing so.
*/
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
// This test suite (shallowly) tests both BinClassifier and BinSegmenter
#include "../finer/BinClassifier.h"
#include "../finer/BinSegmenter.h"
using namespace RubberBand;
using namespace std;
namespace tt = boost::test_tools;
// We use the symbols H, X, and _ for harmonic, percussive, and
// residual respectively, because they are easier to distinguish than
// H, P, R
static constexpr auto H = BinClassifier::Classification::Harmonic;
static constexpr auto X = BinClassifier::Classification::Percussive;
static constexpr auto _ = BinClassifier::Classification::Residual;
vector<string> classes_to_strings(const vector<BinClassifier::Classification> &v)
{
vector<std::string> sv(v.size(), "*");
for (auto i = 0; i < v.size(); ++i) {
switch (v[i]) {
case H: sv[i] = "H"; break;
case X: sv[i] = "X"; break;
case _: sv[i] = "_"; break;
}
}
return sv;
}
BOOST_AUTO_TEST_SUITE(TestBinClassifier)
BOOST_AUTO_TEST_CASE(classify_bins)
{
vector<vector<double>> magColumns {
{ 0, 8, 1, 1, 0, 1 },
{ 0, 8, 0, 0, 0, 0 },
{ 8, 8, 8, 8, 8, 0 },
{ 0, 7, 0, 1, 0, 0 },
{ 0, 6, 0, 0, 0, 0 },
{ 0, 8, 0, 9, 9, 9 },
{ 0, 7, 0, 0, 1, 0 }
};
vector<vector<BinClassifier::Classification>> classifications(7, { 6, _ });
BinClassifier::Parameters params(6, 3, /* lag */ 1, 3, 2.0, 2.0);
BinClassifier classifier(params);
for (int i = 0; i < 7; ++i) {
classifier.classify(magColumns[i].data(), classifications[i].data());
}
/*
The lag of 1 specified for the horizontal filter means that the
results are delayed by a column (here row) but the vertical
filter outputs are aligned with the middle of the 3-bin
horizontal filters rather than the end.
So the horizontal filter outputs (filtering vertically as
presented here) are
0 8 1 1 0 1 <- This is the "lag" column that is not meaningful
0 8 0 0 0 0 <- This is the actual median for the first col (row)
0 8 1 1 0 0
0 8 0 1 0 0
0 7 0 1 0 0
0 7 0 1 0 0
0 7 0 0 1 0
And the vertical ones (lagged by one column to match the
horizontal filter outputs) are
0 0 0 0 0 0 <- The "lag" column (here row)
0 1 1 1 1 0 <- The effective first column (row)
0 0 0 0 0 0
8 8 8 8 8 0
0 0 1 0 0 0
0 0 0 0 0 0
0 0 8 9 9 9
We have harmonic, percussive, and residual bins. (Initially we
detected silent bins too, but of course if done naively that
doesn't align with the lagged filter output, and silent bins
didn't appear relevant enough to take extra trouble over.) In
our case, wherever both horizontal and vertical filter outputs
are the same-ish (0, 1, or one of 7/8/9) we expect to see a
residual classification. Otherwise we expect harmonic if the
horizontal output is greater, percussive otherwise.
*/
vector<vector<BinClassifier::Classification>> expected {
// These results are lagged by one relative to the input
{ _, H, H, H, _, H },
{ _, H, X, X, X, _ },
{ _, H, H, H, _, _ },
{ X, _, X, X, X, _ },
{ _, H, X, H, _, _ },
{ _, H, _, H, _, _ },
{ _, H, X, X, X, X }
};
for (int i = 0; i < 7; ++i) {
BOOST_TEST(classes_to_strings(classifications[i]) ==
classes_to_strings(expected[i]),
tt::per_element());
}
}
BOOST_AUTO_TEST_CASE(segment_classification)
{
vector<vector<BinClassifier::Classification>> classification {
{ _, H, X, X, X, _ },
{ _, H, H, H, _, _ },
{ X, _, X, X, X, _ },
{ _, H, X, H, _, _ },
{ X, X, _, H, _, _ },
{ _, H, X, X, X, X },
{ _, H, _, _, _, _ }
};
BinSegmenter::Parameters params(16, 6, 48000, 3);
BinSegmenter segmenter(params);
vector<BinSegmenter::Segmentation> segmented;
for (int i = 0; i < 7; ++i) {
segmented.push_back(segmenter.segment(classification[i].data()));
}
/*
Modal filter length 3 was specified, with the ordering for
resolving equal counts as H, X, _. So the filtered
classifications will be:
H H X X X X
H H H H _ _
X X X X X X
H H H H _ _
X X H _ _ _
H H X X X X
H _ _ _ _ _
*/
vector<BinSegmenter::Segmentation> expected {
{ 0.0, 3000.0, 15000.0 },
{ 0.0, 9000.0, 9000.0 }, // Though any equal values would do!
{ 0.0, 0.0, 15000.0 },
{ 0.0, 9000.0, 9000.0 },
{ 6000.0, 6000.0, 6000.0 }, // Similarly
{ 0.0, 3000.0, 15000.0 },
{ 0.0, 24000.0, 24000.0 }
};
for (int i = 0; i < 7; ++i) {
BOOST_TEST(segmented[i].percussiveBelow == expected[i].percussiveBelow);
BOOST_TEST(segmented[i].percussiveAbove == expected[i].percussiveAbove);
BOOST_TEST(segmented[i].residualAbove == expected[i].residualAbove);
}
}
BOOST_AUTO_TEST_SUITE_END()