Reorganise into faster (R2) and finer (R3)

This commit is contained in:
Chris Cannam
2022-05-19 13:34:51 +01:00
parent e9264ae909
commit e9ad04e2b4
66 changed files with 89 additions and 127 deletions

151
src/finer/BinClassifier.h Normal file
View File

@@ -0,0 +1,151 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
Rubber Band Library
An audio time-stretching and pitch-shifting library.
Copyright 2007-2022 Particular Programs Ltd.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
Alternatively, if you have a valid commercial licence for the
Rubber Band Library obtained by agreement with the copyright
holders, you may redistribute and/or modify it under the terms
described in that licence.
If you wish to distribute code using the Rubber Band Library
under terms other than those of the GNU General Public License,
you must obtain a valid commercial licence before doing so.
*/
#ifndef RUBBERBAND_BIN_CLASSIFIER_H
#define RUBBERBAND_BIN_CLASSIFIER_H
#include "../common/Allocators.h"
#include "../common/MovingMedian.h"
#include "../common/RingBuffer.h"
#include <vector>
#include <memory>
namespace RubberBand {
class BinClassifier
{
public:
enum class Classification {
Harmonic = 0,
Percussive = 1,
Residual = 2,
Silent = 3
};
struct Parameters {
int binCount;
int horizontalFilterLength;
int horizontalFilterLag;
int verticalFilterLength;
double harmonicThreshold;
double percussiveThreshold;
float silenceThreshold;
Parameters(int _binCount, int _horizontalFilterLength,
int _horizontalFilterLag, int _verticalFilterLength,
double _harmonicThreshold, double _percussiveThreshold,
float _silenceThreshold) :
binCount(_binCount),
horizontalFilterLength(_horizontalFilterLength),
horizontalFilterLag(_horizontalFilterLag),
verticalFilterLength(_verticalFilterLength),
harmonicThreshold(_harmonicThreshold),
percussiveThreshold(_percussiveThreshold),
silenceThreshold(_silenceThreshold) { }
};
BinClassifier(Parameters parameters) :
m_parameters(parameters),
m_vfQueue(parameters.horizontalFilterLag)
{
int n = m_parameters.binCount;
for (int i = 0; i < n; ++i) {
m_hFilters.push_back(std::make_shared<MovingMedian<float>>
(m_parameters.horizontalFilterLength));
}
m_vFilter = std::make_unique<MovingMedian<float>>
(m_parameters.verticalFilterLength);
m_hf = allocate_and_zero<float>(n);
m_vf = allocate_and_zero<float>(n);
for (int i = 0; i < m_parameters.horizontalFilterLag; ++i) {
float *entry = allocate_and_zero<float>(n);
m_vfQueue.write(&entry, 1);
}
}
~BinClassifier()
{
while (m_vfQueue.getReadSpace() > 0) {
float *entry = m_vfQueue.readOne();
deallocate(entry);
}
deallocate(m_hf);
deallocate(m_vf);
}
void classify(const float *const mag, Classification *classification) {
const int n = m_parameters.binCount;
for (int i = 0; i < n; ++i) {
m_hFilters[i]->push(mag[i]);
m_hf[i] = m_hFilters[i]->get();
}
v_copy(m_vf, mag, n);
MovingMedian<float>::filter(*m_vFilter, m_vf);
if (m_parameters.horizontalFilterLag > 0) {
float *lagged = m_vfQueue.readOne();
m_vfQueue.write(&m_vf, 1);
m_vf = lagged;
}
double eps = 1.0e-7;
for (int i = 0; i < n; ++i) {
Classification c;
if (mag[i] < m_parameters.silenceThreshold) {
c = Classification::Silent;
} else if (double(m_hf[i]) / (double(m_vf[i]) + eps) >
m_parameters.harmonicThreshold) {
c = Classification::Harmonic;
} else if (double(m_vf[i]) / (double(m_hf[i]) + eps) >
m_parameters.percussiveThreshold) {
c = Classification::Percussive;
} else {
c = Classification::Residual;
}
classification[i] = c;
}
}
protected:
Parameters m_parameters;
std::vector<std::shared_ptr<MovingMedian<float>>> m_hFilters;
std::unique_ptr<MovingMedian<float>> m_vFilter;
float *m_hf;
float *m_vf;
RingBuffer<float *> m_vfQueue;
BinClassifier(const BinClassifier &) =delete;
BinClassifier &operator=(const BinClassifier &) =delete;
};
}
#endif

130
src/finer/BinSegmenter.h Normal file
View File

@@ -0,0 +1,130 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
Rubber Band Library
An audio time-stretching and pitch-shifting library.
Copyright 2007-2022 Particular Programs Ltd.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
Alternatively, if you have a valid commercial licence for the
Rubber Band Library obtained by agreement with the copyright
holders, you may redistribute and/or modify it under the terms
described in that licence.
If you wish to distribute code using the Rubber Band Library
under terms other than those of the GNU General Public License,
you must obtain a valid commercial licence before doing so.
*/
#ifndef RUBBERBAND_BIN_SEGMENTER_H
#define RUBBERBAND_BIN_SEGMENTER_H
#include "BinClassifier.h"
#include <vector>
namespace RubberBand {
class BinSegmenter
{
public:
struct Segmentation {
double percussiveBelow;
double percussiveAbove;
double residualAbove;
Segmentation(double _pb, double _pa, double _ra) :
percussiveBelow(_pb), percussiveAbove(_pa), residualAbove(_ra) { }
};
struct Parameters {
int fftSize;
double sampleRate;
Parameters(int _fftSize, double _sampleRate) :
fftSize(_fftSize), sampleRate(_sampleRate) { }
};
BinSegmenter(Parameters parameters,
BinClassifier::Parameters classifierParameters) :
m_parameters(parameters),
m_classifierParameters(classifierParameters),
m_classifier(classifierParameters),
m_classification(classifierParameters.binCount,
BinClassifier::Classification::Silent),
m_numeric(classifierParameters.binCount, 0),
m_classFilter(classifierParameters.binCount / 64)
{
}
Segmentation segment(const float *const mag) {
int n = m_classifierParameters.binCount;
m_classifier.classify(mag, m_classification.data());
for (int i = 0; i < n; ++i) {
switch (m_classification[i]) {
case BinClassifier::Classification::Harmonic:
m_numeric[i] = 0; break;
case BinClassifier::Classification::Percussive:
m_numeric[i] = 1; break;
default:
m_numeric[i] = 2; break;
}
}
MovingMedian<int>::filter(m_classFilter, m_numeric.data());
double f0 = 0.0;
for (int i = 1; i < n; ++i) {
if (m_numeric[i] != 1) {
f0 = frequencyForBin(i);
break;
}
}
double nyquist = m_parameters.sampleRate / 2.0;
int top = binForFrequency(16000.0);
if (top >= n) top = n-1;
double f1 = nyquist;
double f2 = nyquist;
bool inPercussive = false;
for (int i = top; i > 0; --i) {
if (m_numeric[i] == 1) { // percussive
if (!inPercussive) {
inPercussive = true;
f2 = frequencyForBin(i);
continue;
}
} else if (m_numeric[i] == 0) { // harmonic
if (inPercussive) {
f1 = frequencyForBin(i);
}
break; // always when harmonic reached
}
}
return Segmentation(f0, f1, f2);
}
protected:
Parameters m_parameters;
BinClassifier::Parameters m_classifierParameters;
BinClassifier m_classifier;
std::vector<BinClassifier::Classification> m_classification;
std::vector<int> m_numeric;
MovingMedian<int> m_classFilter;
int binForFrequency(double f) {
return int(round(f * double(m_parameters.fftSize) /
m_parameters.sampleRate));
}
double frequencyForBin(int b) {
return (double(b) * m_parameters.sampleRate)
/ double(m_parameters.fftSize);
}
BinSegmenter(const BinSegmenter &) =delete;
BinSegmenter &operator=(const BinSegmenter &) =delete;
};
}
#endif

127
src/finer/Peak.h Normal file
View File

@@ -0,0 +1,127 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
Rubber Band Library
An audio time-stretching and pitch-shifting library.
Copyright 2007-2022 Particular Programs Ltd.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
Alternatively, if you have a valid commercial licence for the
Rubber Band Library obtained by agreement with the copyright
holders, you may redistribute and/or modify it under the terms
described in that licence.
If you wish to distribute code using the Rubber Band Library
under terms other than those of the GNU General Public License,
you must obtain a valid commercial licence before doing so.
*/
#ifndef RUBBERBAND_PEAK_H
#define RUBBERBAND_PEAK_H
#include <vector>
namespace RubberBand
{
template <typename T>
class Peak
{
public:
Peak(int n) :
m_n(n),
m_locations(n, 0) { }
// Find the nearest peak to each bin, and optionally the next
// highest peak above each bin, within an array v, where a peak is
// a value greater than the p nearest neighbours on each side. The
// array must have length n where n is the size passed the the
// constructor.
void findNearestAndNextPeaks(const T *const v,
int p,
int *nearest,
int *next = nullptr)
{
findNearestAndNextPeaks(v, 0, m_n, p, nearest, next);
}
// As above but consider only the range of size rangeCount from
// index rangeStart. Write rangeCount results into nearest and
// optionally next, starting to write at index rangeStart - so
// these arrays must have the full length even if rangeCount is
// shorter. Leave the rest of nearest and/or next unmodified.
void findNearestAndNextPeaks(const T *const v,
int rangeStart,
int rangeCount,
int p,
int *nearest,
int *next = nullptr)
{
int nPeaks = 0;
int n = rangeStart + rangeCount;
for (int i = rangeStart; i < n; ++i) {
T x = v[i];
bool good = true;
for (int k = i - p; k <= i + p; ++k) {
if (k < rangeStart || k == i) continue;
if (k >= n) break;
if (k < i && x <= v[k]) {
good = false;
break;
}
if (k > i && x < v[k]) {
good = false;
break;
}
}
if (good) {
m_locations[nPeaks++] = i;
}
}
int pp = rangeStart - 1;
for (int i = rangeStart, j = 0; i < n; ++i) {
int np = i;
if (j < nPeaks) {
np = m_locations[j];
}
if (next) {
if (pp == i) {
next[i] = i;
} else {
next[i] = np;
}
}
if (nearest) {
if (j == 0) {
nearest[i] = np;
} else {
if (np - i < i - pp) {
nearest[i] = np;
} else {
nearest[i] = pp;
}
}
}
while (j < nPeaks && m_locations[j] <= i) {
pp = np;
++j;
}
}
}
protected:
int m_n;
std::vector<int> m_locations;
};
}
#endif

145
src/finer/R3StretcherImpl.h Normal file
View File

@@ -0,0 +1,145 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
Rubber Band Library
An audio time-stretching and pitch-shifting library.
Copyright 2007-2022 Particular Programs Ltd.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
Alternatively, if you have a valid commercial licence for the
Rubber Band Library obtained by agreement with the copyright
holders, you may redistribute and/or modify it under the terms
described in that licence.
If you wish to distribute code using the Rubber Band Library
under terms other than those of the GNU General Public License,
you must obtain a valid commercial licence before doing so.
*/
#ifndef RUBBERBAND_R3_STRETCHERIMPL_H
#define RUBBERBAND_R3_STRETCHERIMPL_H
#include <map>
#include <memory>
#include "BinSegmenter.h"
#include "Peak.h"
#include "../common/FFT.h"
#include "../common/Allocators.h"
namespace RubberBand
{
class R3StretcherImpl
{
public:
R3StretcherImpl(int sampleRate, int channels);
~R3StretcherImpl();
void reset();
void setTimeRatio(double ratio);
void setPitchScale(double scale);
double getTimeRatio() const;
double getPitchScale() const;
protected:
int m_sampleRate;
int m_channels;
double m_timeRatio;
double m_pitchScale;
struct FftBand {
int fftSize;
float f0;
float f1;
FftBand(int _s, float _f0, float _f1) :
fftSize(_s), f0(_f0), f1(_f1) { }
};
struct PhaseLockBand {
int p;
float beta;
float f0;
float f1;
PhaseLockBand(int _p, float _beta, float _f0, float _f1) :
p(_p), beta(_beta), f0(_f0), f1(_f1) { }
};
struct Range {
bool present;
float f0;
float f1;
Range() : present(false), f0(0.f), f1(0.f) { }
};
struct Guidance {
FftBand fftBands[3];
PhaseLockBand phaseLockBands[5];
Range kick;
Range lowPercussive;
Range phaseReset;
Range highPercussive;
Range channelLock;
};
struct ChannelScaleData {
int fftSize;
int bufSize; // size of every array here: fftSize/2 + 1
float *mag;
float *phase;
int *nearestPeaks;
int *nearestTroughs;
float *prevOutMag;
float *prevOutPhase;
int *prevNearestPeaks;
ChannelScaleData(int _fftSize) :
fftSize(_fftSize), bufSize(_fftSize/2 + 1),
mag(allocate_and_zero<float>(size_t(bufSize))),
phase(allocate_and_zero<float>(size_t(bufSize))),
nearestPeaks(allocate_and_zero<int>(size_t(bufSize))),
nearestTroughs(allocate_and_zero<int>(size_t(bufSize))),
prevOutMag(allocate_and_zero<float>(size_t(bufSize))),
prevOutPhase(allocate_and_zero<float>(size_t(bufSize))),
prevNearestPeaks(allocate_and_zero<int>(size_t(bufSize))) { }
~ChannelScaleData() {
deallocate(mag);
deallocate(phase);
deallocate(nearestPeaks);
deallocate(nearestTroughs);
deallocate(prevOutMag);
deallocate(prevOutPhase);
deallocate(prevNearestPeaks);
}
private:
ChannelScaleData(const ChannelScaleData &) =delete;
ChannelScaleData &operator=(const ChannelScaleData &) =delete;
};
struct ChannelData {
std::map<int, std::shared_ptr<ChannelScaleData>> scales;
std::unique_ptr<BinSegmenter> segmenter;
BinSegmenter::Segmentation segmentation;
BinSegmenter::Segmentation prevSegmentation;
BinSegmenter::Segmentation nextSegmentation;
Guidance guidance;
};
std::map<int, std::shared_ptr<FFT>> m_ffts;
};
}
#endif