Remove Silent classification from BinClassifier: not only is it not very useful, it's misaligned with the other classifications because it doesn't account for lag, and so it can make those wrong

This commit is contained in:
Chris Cannam
2022-06-14 13:59:17 +01:00
parent 33a2696b34
commit 638948269b
8 changed files with 215 additions and 28 deletions

View File

@@ -16,6 +16,7 @@ build_script:
- call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat" - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"
- meson build "-Dextra_include_dirs=C:\Program Files\libsndfile\include" "-Dextra_lib_dirs=C:\Program Files\libsndfile\lib" - meson build "-Dextra_include_dirs=C:\Program Files\libsndfile\include" "-Dextra_lib_dirs=C:\Program Files\libsndfile\lib"
- ninja -C build - ninja -C build
- meson test -C build
# Test the VC++ static library build, which is separate # Test the VC++ static library build, which is separate
- msbuild otherbuilds\rubberband-library.vcxproj /t:Build /p:Configuration=Release - msbuild otherbuilds\rubberband-library.vcxproj /t:Build /p:Configuration=Release
# And test the .NET FFI interface build, which is again separate # And test the .NET FFI interface build, which is again separate

View File

@@ -21,6 +21,8 @@ jobs:
run: ninja -C build_macos run: ninja -C build_macos
- name: make ios - name: make ios
run: ninja -C build_ios run: ninja -C build_ios
- name: unit test macos
run: meson test -C build_macos
- name: check otherbuilds - name: check otherbuilds
run: otherbuilds/check.sh run: otherbuilds/check.sh

View File

@@ -90,6 +90,7 @@ unit_test_sources = [
'src/test/TestVectorOpsComplex.cpp', 'src/test/TestVectorOpsComplex.cpp',
'src/test/TestVectorOps.cpp', 'src/test/TestVectorOps.cpp',
'src/test/TestSignalBits.cpp', 'src/test/TestSignalBits.cpp',
'src/test/TestBinClassifier.cpp',
'src/test/test.cpp', 'src/test/test.cpp',
] ]

View File

@@ -39,8 +39,7 @@ public:
enum class Classification { enum class Classification {
Harmonic = 0, Harmonic = 0,
Percussive = 1, Percussive = 1,
Residual = 2, Residual = 2
Silent = 3
}; };
struct Parameters { struct Parameters {
@@ -50,18 +49,15 @@ public:
int verticalFilterLength; int verticalFilterLength;
double harmonicThreshold; double harmonicThreshold;
double percussiveThreshold; double percussiveThreshold;
double silenceThreshold;
Parameters(int _binCount, int _horizontalFilterLength, Parameters(int _binCount, int _horizontalFilterLength,
int _horizontalFilterLag, int _verticalFilterLength, int _horizontalFilterLag, int _verticalFilterLength,
double _harmonicThreshold, double _percussiveThreshold, double _harmonicThreshold, double _percussiveThreshold) :
double _silenceThreshold) :
binCount(_binCount), binCount(_binCount),
horizontalFilterLength(_horizontalFilterLength), horizontalFilterLength(_horizontalFilterLength),
horizontalFilterLag(_horizontalFilterLag), horizontalFilterLag(_horizontalFilterLag),
verticalFilterLength(_verticalFilterLength), verticalFilterLength(_verticalFilterLength),
harmonicThreshold(_harmonicThreshold), harmonicThreshold(_harmonicThreshold),
percussiveThreshold(_percussiveThreshold), percussiveThreshold(_percussiveThreshold) { }
silenceThreshold(_silenceThreshold) { }
}; };
BinClassifier(Parameters parameters) : BinClassifier(Parameters parameters) :
@@ -121,9 +117,7 @@ public:
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
Classification c; Classification c;
if (mag[i] < m_parameters.silenceThreshold) { if (double(m_hf[i]) / (double(m_vf[i]) + eps) >
c = Classification::Silent;
} else if (double(m_hf[i]) / (double(m_vf[i]) + eps) >
m_parameters.harmonicThreshold) { m_parameters.harmonicThreshold) {
c = Classification::Harmonic; c = Classification::Harmonic;
} else if (double(m_vf[i]) / (double(m_hf[i]) + eps) > } else if (double(m_vf[i]) / (double(m_hf[i]) + eps) >

View File

@@ -50,14 +50,17 @@ public:
int fftSize; int fftSize;
int binCount; int binCount;
double sampleRate; double sampleRate;
Parameters(int _fftSize, int _binCount, double _sampleRate) : int classFilterLength;
fftSize(_fftSize), binCount(_binCount), sampleRate(_sampleRate) { } Parameters(int _fftSize, int _binCount, double _sampleRate,
int _classFilterLength) :
fftSize(_fftSize), binCount(_binCount), sampleRate(_sampleRate),
classFilterLength(_classFilterLength) { }
}; };
BinSegmenter(Parameters parameters) : BinSegmenter(Parameters parameters) :
m_parameters(parameters), m_parameters(parameters),
m_numeric(m_parameters.binCount, 0), m_numeric(m_parameters.binCount, 0),
m_classFilter(3, 18) m_classFilter(3, m_parameters.classFilterLength)
{ {
} }
@@ -78,19 +81,19 @@ public:
std::cout << "c:"; std::cout << "c:";
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
if (i > 0) std::cout << ","; if (i > 0) std::cout << ",";
if (m_numeric[i] == 1) { std::cout << m_numeric[i];
std::cout << "1";
} else {
std::cout << "0";
}
} }
std::cout << std::endl; std::cout << std::endl;
*/ */
double f0 = 0.0; double f0 = 0.0;
for (int i = 1; i < n; ++i) { for (int i = 1; i < n; ++i) {
if (m_numeric[i] != 1) { if (m_numeric[i] != 1) { // percussive
if (i == 1 && m_numeric[0] != 1) { // percussive
f0 = 0.0;
} else {
f0 = frequencyForBin f0 = frequencyForBin
(i, m_parameters.fftSize, m_parameters.sampleRate); (i, m_parameters.fftSize, m_parameters.sampleRate);
}
break; break;
} }
} }
@@ -101,7 +104,7 @@ public:
for (int i = n - 1; i > 0; --i) { for (int i = n - 1; i > 0; --i) {
int c = m_numeric[i]; int c = m_numeric[i];
if (!inPercussive) { if (!inPercussive) {
if (c == 2) { // residual/silent if (c == 2) { // residual
continue; continue;
} else if (c == 1) { // percussive } else if (c == 1) { // percussive
inPercussive = true; inPercussive = true;

View File

@@ -58,10 +58,10 @@ R3StretcherImpl::R3StretcherImpl(Parameters parameters,
BinSegmenter::Parameters segmenterParameters BinSegmenter::Parameters segmenterParameters
(m_guideConfiguration.classificationFftSize, (m_guideConfiguration.classificationFftSize,
classificationBins, m_parameters.sampleRate); classificationBins, m_parameters.sampleRate, 18);
BinClassifier::Parameters classifierParameters BinClassifier::Parameters classifierParameters
(classificationBins, 9, 1, 10, 2.0, 2.0, 1.0e-7); (classificationBins, 9, 1, 10, 2.0, 2.0);
int inRingBufferSize = m_guideConfiguration.longestFftSize * 2; int inRingBufferSize = m_guideConfiguration.longestFftSize * 2;
int outRingBufferSize = m_guideConfiguration.longestFftSize * 16; int outRingBufferSize = m_guideConfiguration.longestFftSize * 16;

View File

@@ -193,9 +193,9 @@ protected:
haveReadahead(false), haveReadahead(false),
classifier(new BinClassifier(classifierParameters)), classifier(new BinClassifier(classifierParameters)),
classification(classifierParameters.binCount, classification(classifierParameters.binCount,
BinClassifier::Classification::Silent), BinClassifier::Classification::Residual),
nextClassification(classifierParameters.binCount, nextClassification(classifierParameters.binCount,
BinClassifier::Classification::Silent), BinClassifier::Classification::Residual),
segmenter(new BinSegmenter(segmenterParameters)), segmenter(new BinSegmenter(segmenterParameters)),
segmentation(), prevSegmentation(), nextSegmentation(), segmentation(), prevSegmentation(), nextSegmentation(),
mixdown(longestFftSize, 0.f), // though it could be shorter mixdown(longestFftSize, 0.f), // though it could be shorter

View File

@@ -0,0 +1,186 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
Rubber Band Library
An audio time-stretching and pitch-shifting library.
Copyright 2007-2022 Particular Programs Ltd.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
Alternatively, if you have a valid commercial licence for the
Rubber Band Library obtained by agreement with the copyright
holders, you may redistribute and/or modify it under the terms
described in that licence.
If you wish to distribute code using the Rubber Band Library
under terms other than those of the GNU General Public License,
you must obtain a valid commercial licence before doing so.
*/
#define BOOST_TEST_DYN_LINK
#include <boost/test/unit_test.hpp>
// This test suite (shallowly) tests both BinClassifier and BinSegmenter
#include "../finer/BinClassifier.h"
#include "../finer/BinSegmenter.h"
using namespace RubberBand;
using namespace std;
namespace tt = boost::test_tools;
// We use the symbols H, X, and _ for harmonic, percussive, and
// residual respectively, because they are easier to distinguish than
// H, P, R
static constexpr auto H = BinClassifier::Classification::Harmonic;
static constexpr auto X = BinClassifier::Classification::Percussive;
static constexpr auto _ = BinClassifier::Classification::Residual;
vector<string> classes_to_strings(const vector<BinClassifier::Classification> &v)
{
vector<std::string> sv(v.size(), "*");
for (auto i = 0; i < v.size(); ++i) {
switch (v[i]) {
case H: sv[i] = "H"; break;
case X: sv[i] = "X"; break;
case _: sv[i] = "_"; break;
}
}
return sv;
}
BOOST_AUTO_TEST_SUITE(TestBinClassifier)
BOOST_AUTO_TEST_CASE(classify_bins)
{
vector<vector<double>> magColumns {
{ 0, 8, 1, 1, 0, 1 },
{ 0, 8, 0, 0, 0, 0 },
{ 8, 8, 8, 8, 8, 0 },
{ 0, 7, 0, 1, 0, 0 },
{ 0, 6, 0, 0, 0, 0 },
{ 0, 8, 0, 9, 9, 9 },
{ 0, 7, 0, 0, 1, 0 }
};
vector<vector<BinClassifier::Classification>> classifications(7, { 6, _ });
BinClassifier::Parameters params(6, 3, /* lag */ 1, 3, 2.0, 2.0);
BinClassifier classifier(params);
for (int i = 0; i < 7; ++i) {
classifier.classify(magColumns[i].data(), classifications[i].data());
}
/*
The lag of 1 specified for the horizontal filter means that the
results are delayed by a column (here row) but the vertical
filter outputs are aligned with the middle of the 3-bin
horizontal filters rather than the end.
So the horizontal filter outputs (filtering vertically as
presented here) are
0 8 1 1 0 1 <- This is the "lag" column that is not meaningful
0 8 0 0 0 0 <- This is the actual median for the first col (row)
0 8 1 1 0 0
0 8 0 1 0 0
0 7 0 1 0 0
0 7 0 1 0 0
0 7 0 0 1 0
And the vertical ones (lagged by one column to match the
horizontal filter outputs) are
0 0 0 0 0 0 <- The "lag" column (here row)
0 1 1 1 1 0 <- The effective first column (row)
0 0 0 0 0 0
8 8 8 8 8 0
0 0 1 0 0 0
0 0 0 0 0 0
0 0 8 9 9 9
We have harmonic, percussive, and residual bins. (Initially we
detected silent bins too, but of course if done naively that
doesn't align with the lagged filter output, and silent bins
didn't appear relevant enough to take extra trouble over.) In
our case, wherever both horizontal and vertical filter outputs
are the same-ish (0, 1, or one of 7/8/9) we expect to see a
residual classification. Otherwise we expect harmonic if the
horizontal output is greater, percussive otherwise.
*/
vector<vector<BinClassifier::Classification>> expected {
// These results are lagged by one relative to the input
{ _, H, H, H, _, H },
{ _, H, X, X, X, _ },
{ _, H, H, H, _, _ },
{ X, _, X, X, X, _ },
{ _, H, X, H, _, _ },
{ _, H, _, H, _, _ },
{ _, H, X, X, X, X }
};
for (int i = 0; i < 7; ++i) {
BOOST_TEST(classes_to_strings(classifications[i]) ==
classes_to_strings(expected[i]),
tt::per_element());
}
}
BOOST_AUTO_TEST_CASE(segment_classification)
{
vector<vector<BinClassifier::Classification>> classification {
{ _, H, X, X, X, _ },
{ _, H, H, H, _, _ },
{ X, _, X, X, X, _ },
{ _, H, X, H, _, _ },
{ X, X, _, H, _, _ },
{ _, H, X, X, X, X },
{ _, H, _, _, _, _ }
};
BinSegmenter::Parameters params(16, 6, 48000, 3);
BinSegmenter segmenter(params);
vector<BinSegmenter::Segmentation> segmented;
for (int i = 0; i < 7; ++i) {
segmented.push_back(segmenter.segment(classification[i].data()));
}
/*
Modal filter length 3 was specified, with the ordering for
resolving equal counts as H, X, _. So the filtered
classifications will be:
H H X X X X
H H H H _ _
X X X X X X
H H H H _ _
X X H _ _ _
H H X X X X
H _ _ _ _ _
*/
vector<BinSegmenter::Segmentation> expected {
{ 0.0, 3000.0, 15000.0 },
{ 0.0, 9000.0, 9000.0 }, // Though any equal values would do!
{ 0.0, 0.0, 15000.0 },
{ 0.0, 9000.0, 9000.0 },
{ 6000.0, 6000.0, 6000.0 }, // Similarly
{ 0.0, 3000.0, 15000.0 },
{ 0.0, 24000.0, 24000.0 }
};
for (int i = 0; i < 7; ++i) {
BOOST_TEST(segmented[i].percussiveBelow == expected[i].percussiveBelow);
BOOST_TEST(segmented[i].percussiveAbove == expected[i].percussiveAbove);
BOOST_TEST(segmented[i].residualAbove == expected[i].residualAbove);
}
}
BOOST_AUTO_TEST_SUITE_END()