Toward properly expressing the window sizes in Guide when in single-window mode, so as to separate the time-domain frame length from the longest FFT within R3Stretcher and allow us to use a shorter time-domain frame. Not working correctly in single-window mode yet.
This commit is contained in:
@@ -69,7 +69,9 @@ public:
|
|||||||
|
|
||||||
struct Guidance {
|
struct Guidance {
|
||||||
FftBand fftBands[3];
|
FftBand fftBands[3];
|
||||||
|
int fftBandCount;
|
||||||
PhaseLockBand phaseLockBands[4];
|
PhaseLockBand phaseLockBands[4];
|
||||||
|
int phaseLockBandCount;
|
||||||
Range kick;
|
Range kick;
|
||||||
Range preKick;
|
Range preKick;
|
||||||
Range highUnlocked;
|
Range highUnlocked;
|
||||||
@@ -96,11 +98,10 @@ public:
|
|||||||
int shortestFftSize;
|
int shortestFftSize;
|
||||||
int classificationFftSize;
|
int classificationFftSize;
|
||||||
BandLimits fftBandLimits[3];
|
BandLimits fftBandLimits[3];
|
||||||
Configuration(int _longestFftSize, int _shortestFftSize,
|
int fftBandLimitCount;
|
||||||
int _classificationFftSize) :
|
Configuration() :
|
||||||
longestFftSize(_longestFftSize),
|
longestFftSize(0), shortestFftSize(0), classificationFftSize(0),
|
||||||
shortestFftSize(_shortestFftSize),
|
fftBandLimitCount(0) { }
|
||||||
classificationFftSize(_classificationFftSize) { }
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Parameters {
|
struct Parameters {
|
||||||
@@ -113,46 +114,76 @@ public:
|
|||||||
|
|
||||||
Guide(Parameters parameters, Log log) :
|
Guide(Parameters parameters, Log log) :
|
||||||
m_parameters(parameters),
|
m_parameters(parameters),
|
||||||
m_log(log),
|
m_log(log)
|
||||||
m_configuration(roundUp(int(ceil(parameters.sampleRate / 16.0))),
|
|
||||||
roundUp(int(ceil(parameters.sampleRate / 64.0))),
|
|
||||||
roundUp(int(ceil(parameters.sampleRate / 32.0)))),
|
|
||||||
m_minLower(500.0), m_minHigher(4000.0),
|
|
||||||
m_defaultLower(700.0), m_defaultHigher(4800.0),
|
|
||||||
m_maxLower(1100.0), m_maxHigher(7000.0)
|
|
||||||
{
|
{
|
||||||
double rate = m_parameters.sampleRate;
|
double rate = m_parameters.sampleRate;
|
||||||
|
double nyquist = rate / 2.0;
|
||||||
|
|
||||||
m_log.log(1, "Guide: rate and single-window mode",
|
m_log.log(1, "Guide: rate and single-window mode",
|
||||||
rate, m_parameters.singleWindowMode);
|
rate, m_parameters.singleWindowMode);
|
||||||
|
|
||||||
if (m_parameters.singleWindowMode) {
|
int classificationFftSize =
|
||||||
m_defaultLower = 0.0;
|
roundUp(int(ceil(parameters.sampleRate / 32.0)));
|
||||||
m_defaultHigher = parameters.sampleRate / 2.0;
|
|
||||||
m_minLower = m_defaultLower;
|
|
||||||
m_maxLower = m_defaultLower;
|
|
||||||
m_minHigher = m_defaultHigher;
|
|
||||||
m_maxHigher = m_defaultHigher;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bandFftSize = roundUp(int(ceil(rate/16.0)));
|
m_configuration.classificationFftSize = classificationFftSize;
|
||||||
m_configuration.fftBandLimits[0] =
|
|
||||||
BandLimits(bandFftSize, rate, 0.0, m_maxLower);
|
|
||||||
|
|
||||||
// This is the classification and fallback FFT: we need it to
|
|
||||||
// go up to Nyquist so we can seamlessly switch to it for
|
|
||||||
// longer stretches, and down to 0.0 so we can use it for
|
|
||||||
// unity in offline mode
|
|
||||||
bandFftSize = roundUp(int(ceil(rate/32.0)));
|
|
||||||
m_configuration.fftBandLimits[1] =
|
|
||||||
BandLimits(bandFftSize, rate, 0.0, rate / 2.0);
|
|
||||||
|
|
||||||
bandFftSize = roundUp(int(ceil(rate/64.0)));
|
|
||||||
m_configuration.fftBandLimits[2] =
|
|
||||||
BandLimits(bandFftSize, rate, m_minHigher, rate/2.0);
|
|
||||||
|
|
||||||
m_log.log(1, "Guide: classification FFT size",
|
m_log.log(1, "Guide: classification FFT size",
|
||||||
m_configuration.classificationFftSize);
|
m_configuration.classificationFftSize);
|
||||||
|
|
||||||
|
if (m_parameters.singleWindowMode) {
|
||||||
|
|
||||||
|
// Single-window mode
|
||||||
|
|
||||||
|
m_configuration.longestFftSize = classificationFftSize;
|
||||||
|
m_configuration.shortestFftSize = classificationFftSize;
|
||||||
|
|
||||||
|
m_defaultLower = nyquist;
|
||||||
|
m_minLower = m_defaultLower;
|
||||||
|
m_maxLower = m_defaultLower;
|
||||||
|
|
||||||
|
m_defaultHigher = nyquist;
|
||||||
|
m_minHigher = m_defaultHigher;
|
||||||
|
m_maxHigher = m_defaultHigher;
|
||||||
|
|
||||||
|
m_configuration.fftBandLimitCount = 1;
|
||||||
|
|
||||||
|
m_configuration.fftBandLimits[0] =
|
||||||
|
BandLimits(classificationFftSize, rate, 0.0, nyquist);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// The normal multi-window mode
|
||||||
|
|
||||||
|
m_configuration.longestFftSize = classificationFftSize * 2;
|
||||||
|
m_configuration.shortestFftSize = classificationFftSize / 2;
|
||||||
|
|
||||||
|
m_defaultLower = 700.0;
|
||||||
|
m_minLower = 500.0;
|
||||||
|
m_maxLower = 1100.0;
|
||||||
|
|
||||||
|
m_defaultHigher = 4800.0;
|
||||||
|
m_minHigher = 4000.0;
|
||||||
|
m_maxHigher = 7000.0;
|
||||||
|
|
||||||
|
m_configuration.fftBandLimitCount = 3;
|
||||||
|
|
||||||
|
m_configuration.fftBandLimits[0] =
|
||||||
|
BandLimits(m_configuration.longestFftSize,
|
||||||
|
rate, 0.0, m_maxLower);
|
||||||
|
|
||||||
|
// This is the classification and fallback FFT: we need it
|
||||||
|
// to go up to Nyquist so we can seamlessly switch to it
|
||||||
|
// for longer stretches, and down to 0.0 so we can use it
|
||||||
|
// for unity in offline mode
|
||||||
|
|
||||||
|
m_configuration.fftBandLimits[1] =
|
||||||
|
BandLimits(classificationFftSize,
|
||||||
|
rate, 0.0, nyquist);
|
||||||
|
|
||||||
|
m_configuration.fftBandLimits[2] =
|
||||||
|
BandLimits(m_configuration.shortestFftSize,
|
||||||
|
rate, m_minHigher, nyquist);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const Configuration &getConfiguration() const {
|
const Configuration &getConfiguration() const {
|
||||||
@@ -184,13 +215,58 @@ public:
|
|||||||
guidance.channelLock.present = false;
|
guidance.channelLock.present = false;
|
||||||
|
|
||||||
double nyquist = m_parameters.sampleRate / 2.0;
|
double nyquist = m_parameters.sampleRate / 2.0;
|
||||||
guidance.fftBands[0].fftSize = roundUp(int(ceil(nyquist/8.0)));
|
|
||||||
guidance.fftBands[1].fftSize = roundUp(int(ceil(nyquist/16.0)));
|
|
||||||
guidance.fftBands[2].fftSize = roundUp(int(ceil(nyquist/32.0)));
|
|
||||||
|
|
||||||
// This is a vital stop case for PhaseAdvance
|
if (m_parameters.singleWindowMode) {
|
||||||
guidance.phaseLockBands[3].f1 = nyquist;
|
|
||||||
|
|
||||||
|
// All the fft and phase-lock bands are fixed in this
|
||||||
|
// mode. We'll still need to continue to set up phase
|
||||||
|
// reset ranges etc, including the unity case.
|
||||||
|
|
||||||
|
guidance.fftBandCount = 1;
|
||||||
|
guidance.fftBands[0].fftSize = m_configuration.classificationFftSize;
|
||||||
|
guidance.fftBands[0].f0 = 0.0;
|
||||||
|
guidance.fftBands[0].f1 = nyquist;
|
||||||
|
|
||||||
|
guidance.phaseLockBandCount = 3;
|
||||||
|
|
||||||
|
guidance.phaseLockBands[0].p = 1;
|
||||||
|
guidance.phaseLockBands[0].beta = betaFor(1200.0, ratio);
|
||||||
|
guidance.phaseLockBands[0].f0 = 0.0;
|
||||||
|
guidance.phaseLockBands[0].f1 = 1600.0;
|
||||||
|
|
||||||
|
guidance.phaseLockBands[1].p = 2;
|
||||||
|
guidance.phaseLockBands[1].beta = betaFor(4800.0, ratio);
|
||||||
|
guidance.phaseLockBands[1].f0 = 1600.0;
|
||||||
|
guidance.phaseLockBands[1].f1 = 7000.0;
|
||||||
|
|
||||||
|
guidance.phaseLockBands[2].p = 5;
|
||||||
|
guidance.phaseLockBands[2].beta = betaFor(10000.0, ratio);
|
||||||
|
guidance.phaseLockBands[2].f0 = 7000.0;
|
||||||
|
guidance.phaseLockBands[2].f1 = nyquist;
|
||||||
|
|
||||||
|
if (outhop > 256) {
|
||||||
|
guidance.phaseLockBands[2].p = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
// The normal multi-window mode
|
||||||
|
|
||||||
|
guidance.fftBandCount = 3;
|
||||||
|
guidance.fftBands[0].fftSize = m_configuration.longestFftSize;
|
||||||
|
guidance.fftBands[1].fftSize = m_configuration.classificationFftSize;
|
||||||
|
guidance.fftBands[2].fftSize = m_configuration.shortestFftSize;
|
||||||
|
|
||||||
|
guidance.phaseLockBandCount = 4;
|
||||||
|
|
||||||
|
// This is a vital stop case for PhaseAdvance
|
||||||
|
guidance.phaseLockBands[3].f1 = nyquist;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We've set the counts, and for single-window mode we've set
|
||||||
|
// the band ranges as well - in normal multi-window mode we
|
||||||
|
// still have to do that, but we should do these first
|
||||||
|
|
||||||
if (meanMagnitude < 1.0e-6) {
|
if (meanMagnitude < 1.0e-6) {
|
||||||
updateForSilence(guidance);
|
updateForSilence(guidance);
|
||||||
return;
|
return;
|
||||||
@@ -199,8 +275,6 @@ public:
|
|||||||
if (unityCount > 0) {
|
if (unityCount > 0) {
|
||||||
updateForUnity(guidance,
|
updateForUnity(guidance,
|
||||||
hadPhaseReset,
|
hadPhaseReset,
|
||||||
unityCount,
|
|
||||||
magnitudes,
|
|
||||||
segmentation,
|
segmentation,
|
||||||
realtime);
|
realtime);
|
||||||
return;
|
return;
|
||||||
@@ -265,66 +339,64 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double prevLower = guidance.fftBands[0].f1;
|
if (!m_parameters.singleWindowMode) {
|
||||||
double lower = descendToValley(prevLower, magnitudes);
|
|
||||||
if (lower > m_maxLower || lower < m_minLower) {
|
// The normal multi-window mode. For single-window we did
|
||||||
lower = m_defaultLower;
|
// this already.
|
||||||
|
|
||||||
|
double prevLower = guidance.fftBands[0].f1;
|
||||||
|
double lower = descendToValley(prevLower, magnitudes);
|
||||||
|
if (lower > m_maxLower || lower < m_minLower) {
|
||||||
|
lower = m_defaultLower;
|
||||||
|
}
|
||||||
|
|
||||||
|
double prevHigher = guidance.fftBands[1].f1;
|
||||||
|
double higher = descendToValley(prevHigher, magnitudes);
|
||||||
|
if (higher > m_maxHigher || higher < m_minHigher) {
|
||||||
|
higher = m_defaultHigher;
|
||||||
|
}
|
||||||
|
|
||||||
|
guidance.fftBands[0].f0 = 0.0;
|
||||||
|
guidance.fftBands[0].f1 = lower;
|
||||||
|
|
||||||
|
guidance.fftBands[1].f0 = lower;
|
||||||
|
guidance.fftBands[1].f1 = higher;
|
||||||
|
|
||||||
|
guidance.fftBands[2].f0 = higher;
|
||||||
|
guidance.fftBands[2].f1 = nyquist;
|
||||||
|
|
||||||
|
if (outhop > 256) {
|
||||||
|
guidance.fftBands[1].f1 = nyquist;
|
||||||
|
guidance.fftBands[2].f0 = nyquist;
|
||||||
|
}
|
||||||
|
|
||||||
|
double mid = std::max(lower, 1600.0);
|
||||||
|
|
||||||
|
guidance.phaseLockBands[0].p = 1;
|
||||||
|
guidance.phaseLockBands[0].beta = betaFor(300.0, ratio);
|
||||||
|
guidance.phaseLockBands[0].f0 = 0.0;
|
||||||
|
guidance.phaseLockBands[0].f1 = lower;
|
||||||
|
|
||||||
|
guidance.phaseLockBands[1].p = 2;
|
||||||
|
guidance.phaseLockBands[1].beta = betaFor(1600.0, ratio);
|
||||||
|
guidance.phaseLockBands[1].f0 = lower;
|
||||||
|
guidance.phaseLockBands[1].f1 = mid;
|
||||||
|
|
||||||
|
guidance.phaseLockBands[2].p = 3;
|
||||||
|
guidance.phaseLockBands[2].beta = betaFor(4800.0, ratio);
|
||||||
|
guidance.phaseLockBands[2].f0 = mid;
|
||||||
|
guidance.phaseLockBands[2].f1 = higher;
|
||||||
|
|
||||||
|
guidance.phaseLockBands[3].p = 4;
|
||||||
|
guidance.phaseLockBands[3].beta = betaFor(10000.0, ratio);
|
||||||
|
guidance.phaseLockBands[3].f0 = higher;
|
||||||
|
guidance.phaseLockBands[3].f1 = nyquist;
|
||||||
|
|
||||||
|
if (outhop > 256) {
|
||||||
|
guidance.phaseLockBands[3].p = 3;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double prevHigher = guidance.fftBands[1].f1;
|
|
||||||
double higher = descendToValley(prevHigher, magnitudes);
|
|
||||||
if (higher > m_maxHigher || higher < m_minHigher) {
|
|
||||||
higher = m_defaultHigher;
|
|
||||||
}
|
|
||||||
|
|
||||||
guidance.fftBands[0].f0 = 0.0;
|
|
||||||
guidance.fftBands[0].f1 = lower;
|
|
||||||
|
|
||||||
// std::cout << "x:" << lower << std::endl;
|
|
||||||
|
|
||||||
guidance.fftBands[1].f0 = lower;
|
|
||||||
guidance.fftBands[1].f1 = higher;
|
|
||||||
|
|
||||||
guidance.fftBands[2].f0 = higher;
|
|
||||||
guidance.fftBands[2].f1 = nyquist;
|
|
||||||
|
|
||||||
if (outhop > 256) {
|
|
||||||
guidance.fftBands[1].f1 = nyquist;
|
|
||||||
guidance.fftBands[2].f0 = nyquist;
|
|
||||||
}
|
|
||||||
|
|
||||||
double mid = std::max(lower, 1600.0);
|
|
||||||
|
|
||||||
guidance.phaseLockBands[0].p = 1;
|
|
||||||
guidance.phaseLockBands[0].beta = betaFor(300.0, ratio);
|
|
||||||
guidance.phaseLockBands[0].f0 = 0.0;
|
|
||||||
guidance.phaseLockBands[0].f1 = lower;
|
|
||||||
|
|
||||||
guidance.phaseLockBands[1].p = 2;
|
|
||||||
guidance.phaseLockBands[1].beta = betaFor(1600.0, ratio);
|
|
||||||
guidance.phaseLockBands[1].f0 = lower;
|
|
||||||
guidance.phaseLockBands[1].f1 = mid;
|
|
||||||
|
|
||||||
guidance.phaseLockBands[2].p = 3;
|
|
||||||
guidance.phaseLockBands[2].beta = betaFor(5000.0, ratio);
|
|
||||||
guidance.phaseLockBands[2].f0 = mid;
|
|
||||||
guidance.phaseLockBands[2].f1 = higher;
|
|
||||||
|
|
||||||
guidance.phaseLockBands[3].p = 4;
|
|
||||||
guidance.phaseLockBands[3].beta = betaFor(10000.0, ratio);
|
|
||||||
guidance.phaseLockBands[3].f0 = higher;
|
|
||||||
guidance.phaseLockBands[3].f1 = nyquist;
|
|
||||||
|
|
||||||
if (m_parameters.singleWindowMode) {
|
|
||||||
guidance.phaseLockBands[1].p = 1;
|
|
||||||
guidance.phaseLockBands[2].p = 2;
|
|
||||||
guidance.phaseLockBands[3].p = 5;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (outhop > 256) {
|
|
||||||
guidance.phaseLockBands[3].p = 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ratio > 2.0) {
|
if (ratio > 2.0) {
|
||||||
|
|
||||||
// For very long stretches, diffuse is better than
|
// For very long stretches, diffuse is better than
|
||||||
@@ -348,7 +420,7 @@ public:
|
|||||||
guidance.highUnlocked.present = true;
|
guidance.highUnlocked.present = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
std::ostringstream str;
|
std::ostringstream str;
|
||||||
str << "Guidance: FFT bands: ["
|
str << "Guidance: FFT bands: ["
|
||||||
<< guidance.fftBands[0].fftSize << " from "
|
<< guidance.fftBands[0].fftSize << " from "
|
||||||
@@ -363,8 +435,9 @@ public:
|
|||||||
<< guidance.phaseReset.present << " from "
|
<< guidance.phaseReset.present << " from "
|
||||||
<< guidance.phaseReset.f0 << " to " << guidance.phaseReset.f1
|
<< guidance.phaseReset.f0 << " to " << guidance.phaseReset.f1
|
||||||
<< "]" << std::endl;
|
<< "]" << std::endl;
|
||||||
m_parameters.logger(str.str());
|
|
||||||
*/
|
m_log.log(1, str.str().c_str());
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
void setDebugLevel(int level) {
|
void setDebugLevel(int level) {
|
||||||
@@ -396,12 +469,14 @@ protected:
|
|||||||
void updateForSilence(Guidance &guidance) const {
|
void updateForSilence(Guidance &guidance) const {
|
||||||
// std::cout << "phase reset on silence" << std::endl;
|
// std::cout << "phase reset on silence" << std::endl;
|
||||||
double nyquist = m_parameters.sampleRate / 2.0;
|
double nyquist = m_parameters.sampleRate / 2.0;
|
||||||
guidance.fftBands[0].f0 = 0.0;
|
if (!m_parameters.singleWindowMode) {
|
||||||
guidance.fftBands[0].f1 = 0.0;
|
guidance.fftBands[0].f0 = 0.0;
|
||||||
guidance.fftBands[1].f0 = 0.0;
|
guidance.fftBands[0].f1 = 0.0;
|
||||||
guidance.fftBands[1].f1 = nyquist;
|
guidance.fftBands[1].f0 = 0.0;
|
||||||
guidance.fftBands[2].f0 = nyquist;
|
guidance.fftBands[1].f1 = nyquist;
|
||||||
guidance.fftBands[2].f1 = nyquist;
|
guidance.fftBands[2].f0 = nyquist;
|
||||||
|
guidance.fftBands[2].f1 = nyquist;
|
||||||
|
}
|
||||||
guidance.phaseReset.present = true;
|
guidance.phaseReset.present = true;
|
||||||
guidance.phaseReset.f0 = 0.0;
|
guidance.phaseReset.f0 = 0.0;
|
||||||
guidance.phaseReset.f1 = nyquist;
|
guidance.phaseReset.f1 = nyquist;
|
||||||
@@ -409,8 +484,6 @@ protected:
|
|||||||
|
|
||||||
void updateForUnity(Guidance &guidance,
|
void updateForUnity(Guidance &guidance,
|
||||||
bool hadPhaseReset,
|
bool hadPhaseReset,
|
||||||
uint32_t /* unityCount */,
|
|
||||||
const process_t *const /* magnitudes */,
|
|
||||||
const BinSegmenter::Segmentation &segmentation,
|
const BinSegmenter::Segmentation &segmentation,
|
||||||
bool realtime) const {
|
bool realtime) const {
|
||||||
|
|
||||||
@@ -421,30 +494,26 @@ protected:
|
|||||||
if (!realtime) {
|
if (!realtime) {
|
||||||
// ratio can't change, so we are just running 1.0 ratio
|
// ratio can't change, so we are just running 1.0 ratio
|
||||||
// throughout
|
// throughout
|
||||||
guidance.fftBands[0].f0 = 0.0;
|
if (!m_parameters.singleWindowMode) {
|
||||||
guidance.fftBands[0].f1 = 0.0;
|
guidance.fftBands[0].f0 = 0.0;
|
||||||
guidance.fftBands[1].f0 = 0.0;
|
guidance.fftBands[0].f1 = 0.0;
|
||||||
guidance.fftBands[1].f1 = nyquist;
|
guidance.fftBands[1].f0 = 0.0;
|
||||||
guidance.fftBands[2].f0 = nyquist;
|
guidance.fftBands[1].f1 = nyquist;
|
||||||
guidance.fftBands[2].f1 = nyquist;
|
guidance.fftBands[2].f0 = nyquist;
|
||||||
|
guidance.fftBands[2].f1 = nyquist;
|
||||||
|
}
|
||||||
guidance.phaseReset.present = true;
|
guidance.phaseReset.present = true;
|
||||||
guidance.phaseReset.f0 = 0.0;
|
guidance.phaseReset.f0 = 0.0;
|
||||||
guidance.phaseReset.f1 = nyquist;
|
guidance.phaseReset.f1 = nyquist;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
guidance.fftBands[0].f0 = 0.0;
|
if (!m_parameters.singleWindowMode) {
|
||||||
guidance.fftBands[0].f1 = m_minLower;
|
guidance.fftBands[0].f0 = 0.0;
|
||||||
guidance.fftBands[1].f0 = m_minLower;
|
guidance.fftBands[0].f1 = m_minLower;
|
||||||
guidance.fftBands[1].f1 = m_minHigher;
|
guidance.fftBands[1].f0 = m_minLower;
|
||||||
guidance.fftBands[2].f0 = m_minHigher;
|
guidance.fftBands[1].f1 = m_minHigher;
|
||||||
guidance.fftBands[2].f1 = nyquist;
|
guidance.fftBands[2].f0 = m_minHigher;
|
||||||
|
|
||||||
if (m_parameters.singleWindowMode) {
|
|
||||||
guidance.fftBands[0].f1 = 0.0;
|
|
||||||
guidance.fftBands[1].f0 = 0.0;
|
|
||||||
guidance.fftBands[1].f1 = nyquist;
|
|
||||||
guidance.fftBands[2].f0 = nyquist;
|
|
||||||
guidance.fftBands[2].f1 = nyquist;
|
guidance.fftBands[2].f1 = nyquist;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -74,6 +74,14 @@ R3Stretcher::R3Stretcher(Parameters parameters,
|
|||||||
if (isSingleWindowed()) {
|
if (isSingleWindowed()) {
|
||||||
m_log.log(1, "R3Stretcher::R3Stretcher: intermediate shorter-window mode requested");
|
m_log.log(1, "R3Stretcher::R3Stretcher: intermediate shorter-window mode requested");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_guideConfiguration.longestFftSize >
|
||||||
|
m_guideConfiguration.classificationFftSize) {
|
||||||
|
m_timeDomainFrameLength = m_guideConfiguration.longestFftSize;
|
||||||
|
} else {
|
||||||
|
m_timeDomainFrameLength =
|
||||||
|
(m_guideConfiguration.classificationFftSize * 3) / 2;
|
||||||
|
}
|
||||||
|
|
||||||
double maxClassifierFrequency = 16000.0;
|
double maxClassifierFrequency = 16000.0;
|
||||||
if (maxClassifierFrequency > m_parameters.sampleRate/2) {
|
if (maxClassifierFrequency > m_parameters.sampleRate/2) {
|
||||||
@@ -90,25 +98,27 @@ R3Stretcher::R3Stretcher(Parameters parameters,
|
|||||||
BinClassifier::Parameters classifierParameters
|
BinClassifier::Parameters classifierParameters
|
||||||
(classificationBins, 9, 1, 10, 2.0, 2.0);
|
(classificationBins, 9, 1, 10, 2.0, 2.0);
|
||||||
|
|
||||||
int inRingBufferSize = m_guideConfiguration.longestFftSize * 2;
|
int inRingBufferSize = m_timeDomainFrameLength * 2;
|
||||||
int outRingBufferSize = m_guideConfiguration.longestFftSize * 16;
|
int outRingBufferSize = m_timeDomainFrameLength * 16;
|
||||||
|
|
||||||
for (int c = 0; c < m_parameters.channels; ++c) {
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
||||||
m_channelData.push_back(std::make_shared<ChannelData>
|
m_channelData.push_back(std::make_shared<ChannelData>
|
||||||
(segmenterParameters,
|
(segmenterParameters,
|
||||||
classifierParameters,
|
classifierParameters,
|
||||||
m_guideConfiguration.longestFftSize,
|
m_timeDomainFrameLength,
|
||||||
inRingBufferSize,
|
inRingBufferSize,
|
||||||
outRingBufferSize));
|
outRingBufferSize));
|
||||||
for (auto band: m_guideConfiguration.fftBandLimits) {
|
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
|
||||||
|
const auto &band = m_guideConfiguration.fftBandLimits[b];
|
||||||
int fftSize = band.fftSize;
|
int fftSize = band.fftSize;
|
||||||
m_channelData[c]->scales[fftSize] =
|
m_channelData[c]->scales[fftSize] =
|
||||||
std::make_shared<ChannelScaleData>
|
std::make_shared<ChannelScaleData>
|
||||||
(fftSize, m_guideConfiguration.longestFftSize);
|
(fftSize, m_timeDomainFrameLength);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto band: m_guideConfiguration.fftBandLimits) {
|
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
|
||||||
|
const auto &band = m_guideConfiguration.fftBandLimits[b];
|
||||||
int fftSize = band.fftSize;
|
int fftSize = band.fftSize;
|
||||||
GuidedPhaseAdvance::Parameters guidedParameters
|
GuidedPhaseAdvance::Parameters guidedParameters
|
||||||
(fftSize, m_parameters.sampleRate, m_parameters.channels,
|
(fftSize, m_parameters.sampleRate, m_parameters.channels,
|
||||||
@@ -273,7 +283,7 @@ R3Stretcher::createResampler()
|
|||||||
}
|
}
|
||||||
|
|
||||||
resamplerParameters.initialSampleRate = m_parameters.sampleRate;
|
resamplerParameters.initialSampleRate = m_parameters.sampleRate;
|
||||||
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize;
|
resamplerParameters.maxBufferSize = m_timeDomainFrameLength;
|
||||||
|
|
||||||
if (isRealTime()) {
|
if (isRealTime()) {
|
||||||
// If we knew the caller would never change ratio, we could
|
// If we knew the caller would never change ratio, we could
|
||||||
@@ -447,7 +457,7 @@ R3Stretcher::getPreferredStartPad() const
|
|||||||
if (!isRealTime()) {
|
if (!isRealTime()) {
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
return m_guideConfiguration.longestFftSize / 2;
|
return m_timeDomainFrameLength / 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -458,7 +468,7 @@ R3Stretcher::getStartDelay() const
|
|||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
double factor = 0.5 / m_pitchScale;
|
double factor = 0.5 / m_pitchScale;
|
||||||
return size_t(ceil(m_guideConfiguration.longestFftSize * factor));
|
return size_t(ceil(m_timeDomainFrameLength * factor));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -531,10 +541,9 @@ size_t
|
|||||||
R3Stretcher::getSamplesRequired() const
|
R3Stretcher::getSamplesRequired() const
|
||||||
{
|
{
|
||||||
if (available() != 0) return 0;
|
if (available() != 0) return 0;
|
||||||
int longest = m_guideConfiguration.longestFftSize;
|
|
||||||
int rs = m_channelData[0]->inbuf->getReadSpace();
|
int rs = m_channelData[0]->inbuf->getReadSpace();
|
||||||
if (rs < longest) {
|
if (rs < m_timeDomainFrameLength) {
|
||||||
return longest - rs;
|
return m_timeDomainFrameLength - rs;
|
||||||
} else {
|
} else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -544,7 +553,7 @@ void
|
|||||||
R3Stretcher::setMaxProcessSize(size_t n)
|
R3Stretcher::setMaxProcessSize(size_t n)
|
||||||
{
|
{
|
||||||
size_t oldSize = m_channelData[0]->inbuf->getSize();
|
size_t oldSize = m_channelData[0]->inbuf->getSize();
|
||||||
size_t newSize = m_guideConfiguration.longestFftSize + n;
|
size_t newSize = m_timeDomainFrameLength + n;
|
||||||
|
|
||||||
if (newSize > oldSize) {
|
if (newSize > oldSize) {
|
||||||
m_log.log(1, "setMaxProcessSize: resizing from and to", oldSize, newSize);
|
m_log.log(1, "setMaxProcessSize: resizing from and to", oldSize, newSize);
|
||||||
@@ -599,11 +608,11 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final)
|
|||||||
createResampler();
|
createResampler();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pad to half the longest frame. As with R2, in real-time
|
// Pad to half the frame. As with R2, in real-time mode we
|
||||||
// mode we don't do this -- it's better to start with a
|
// don't do this -- it's better to start with a swoosh
|
||||||
// swoosh than introduce more latency, and we don't want
|
// than introduce more latency, and we don't want gaps
|
||||||
// gaps when the ratio changes.
|
// when the ratio changes.
|
||||||
int pad = m_guideConfiguration.longestFftSize / 2;
|
int pad = m_timeDomainFrameLength / 2;
|
||||||
m_log.log(1, "offline mode: prefilling with", pad);
|
m_log.log(1, "offline mode: prefilling with", pad);
|
||||||
for (int c = 0; c < m_parameters.channels; ++c) {
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
||||||
m_channelData[c]->inbuf->zero(pad);
|
m_channelData[c]->inbuf->zero(pad);
|
||||||
@@ -735,7 +744,7 @@ R3Stretcher::consume()
|
|||||||
// the map iterators
|
// the map iterators
|
||||||
|
|
||||||
int readSpace = cd0->inbuf->getReadSpace();
|
int readSpace = cd0->inbuf->getReadSpace();
|
||||||
if (readSpace < longest) {
|
if (readSpace < m_timeDomainFrameLength) {
|
||||||
if (m_mode == ProcessMode::Finished) {
|
if (m_mode == ProcessMode::Finished) {
|
||||||
if (readSpace == 0) {
|
if (readSpace == 0) {
|
||||||
int fill = cd0->scales.at(longest)->accumulatorFill;
|
int fill = cd0->scales.at(longest)->accumulatorFill;
|
||||||
@@ -761,10 +770,6 @@ R3Stretcher::consume()
|
|||||||
|
|
||||||
for (auto &it : m_channelData[0]->scales) {
|
for (auto &it : m_channelData[0]->scales) {
|
||||||
int fftSize = it.first;
|
int fftSize = it.first;
|
||||||
if (isSingleWindowed() &&
|
|
||||||
fftSize != m_guideConfiguration.classificationFftSize) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for (int c = 0; c < channels; ++c) {
|
for (int c = 0; c < channels; ++c) {
|
||||||
auto &cd = m_channelData.at(c);
|
auto &cd = m_channelData.at(c);
|
||||||
auto &scale = cd->scales.at(fftSize);
|
auto &scale = cd->scales.at(fftSize);
|
||||||
@@ -887,6 +892,8 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
auto &cd = m_channelData.at(c);
|
auto &cd = m_channelData.at(c);
|
||||||
process_t *buf = cd->scales.at(longest)->timeDomain.data();
|
process_t *buf = cd->scales.at(longest)->timeDomain.data();
|
||||||
|
|
||||||
|
//!!! review
|
||||||
|
|
||||||
int readSpace = cd->inbuf->getReadSpace();
|
int readSpace = cd->inbuf->getReadSpace();
|
||||||
if (readSpace < longest) {
|
if (readSpace < longest) {
|
||||||
cd->inbuf->peek(buf, readSpace);
|
cd->inbuf->peek(buf, readSpace);
|
||||||
@@ -900,16 +907,15 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
// it, windowing as we copy. The classification scale is handled
|
// it, windowing as we copy. The classification scale is handled
|
||||||
// separately because it has readahead, so skip it here as well as
|
// separately because it has readahead, so skip it here as well as
|
||||||
// the longest. (In practice this means we are probably only
|
// the longest. (In practice this means we are probably only
|
||||||
// populating one scale)
|
// populating one scale in multi-window mode, and none at all in
|
||||||
|
// single-window mode)
|
||||||
|
|
||||||
if (!isSingleWindowed()) {
|
for (auto &it: cd->scales) {
|
||||||
for (auto &it: cd->scales) {
|
int fftSize = it.first;
|
||||||
int fftSize = it.first;
|
if (fftSize == classify || fftSize == longest) continue;
|
||||||
if (fftSize == classify || fftSize == longest) continue;
|
int offset = (longest - fftSize) / 2;
|
||||||
int offset = (longest - fftSize) / 2;
|
m_scaleData.at(fftSize)->analysisWindow.cut
|
||||||
m_scaleData.at(fftSize)->analysisWindow.cut
|
(buf + offset, it.second->timeDomain.data());
|
||||||
(buf + offset, it.second->timeDomain.data());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The classification scale has a one-hop readahead, so populate
|
// The classification scale has a one-hop readahead, so populate
|
||||||
@@ -937,7 +943,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Finally window the longest scale
|
// Finally window the longest scale
|
||||||
if (!isSingleWindowed()) {
|
if (classify != longest) {
|
||||||
m_scaleData.at(longest)->analysisWindow.cut(buf);
|
m_scaleData.at(longest)->analysisWindow.cut(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -964,14 +970,14 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
classifyScale->real.data(),
|
classifyScale->real.data(),
|
||||||
classifyScale->imag.data());
|
classifyScale->imag.data());
|
||||||
|
|
||||||
for (const auto &b : m_guideConfiguration.fftBandLimits) {
|
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
|
||||||
if (b.fftSize == classify) {
|
const auto &band = m_guideConfiguration.fftBandLimits[b];
|
||||||
|
if (band.fftSize == classify) {
|
||||||
ToPolarSpec spec;
|
ToPolarSpec spec;
|
||||||
spec.magFromBin = 0;
|
spec.magFromBin = 0;
|
||||||
spec.magBinCount = classify/2 + 1;
|
spec.magBinCount = classify/2 + 1;
|
||||||
spec.polarFromBin = b.b0min;
|
spec.polarFromBin = band.b0min;
|
||||||
spec.polarBinCount = b.b1max - b.b0min + 1;
|
spec.polarBinCount = band.b1max - band.b0min + 1;
|
||||||
convertToPolar(readahead.mag.data(),
|
convertToPolar(readahead.mag.data(),
|
||||||
readahead.phase.data(),
|
readahead.phase.data(),
|
||||||
classifyScale->real.data(),
|
classifyScale->real.data(),
|
||||||
@@ -997,9 +1003,6 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
if (fftSize == classify && haveValidReadahead) {
|
if (fftSize == classify && haveValidReadahead) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (isSingleWindowed() && fftSize != classify) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto &scale = it.second;
|
auto &scale = it.second;
|
||||||
|
|
||||||
@@ -1009,8 +1012,9 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
scale->real.data(),
|
scale->real.data(),
|
||||||
scale->imag.data());
|
scale->imag.data());
|
||||||
|
|
||||||
for (const auto &b : m_guideConfiguration.fftBandLimits) {
|
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
|
||||||
if (b.fftSize == fftSize) {
|
const auto &band = m_guideConfiguration.fftBandLimits[b];
|
||||||
|
if (band.fftSize == fftSize) {
|
||||||
|
|
||||||
ToPolarSpec spec;
|
ToPolarSpec spec;
|
||||||
|
|
||||||
@@ -1024,11 +1028,11 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
if (fftSize == classify) {
|
if (fftSize == classify) {
|
||||||
spec.magFromBin = 0;
|
spec.magFromBin = 0;
|
||||||
spec.magBinCount = classify/2 + 1;
|
spec.magBinCount = classify/2 + 1;
|
||||||
spec.polarFromBin = b.b0min;
|
spec.polarFromBin = band.b0min;
|
||||||
spec.polarBinCount = b.b1max - b.b0min + 1;
|
spec.polarBinCount = band.b1max - band.b0min + 1;
|
||||||
} else {
|
} else {
|
||||||
spec.magFromBin = b.b0min;
|
spec.magFromBin = band.b0min;
|
||||||
spec.magBinCount = b.b1max - b.b0min + 1;
|
spec.magBinCount = band.b1max - band.b0min + 1;
|
||||||
spec.polarFromBin = spec.magFromBin;
|
spec.polarFromBin = spec.magFromBin;
|
||||||
spec.polarBinCount = spec.magBinCount;
|
spec.polarBinCount = spec.magBinCount;
|
||||||
}
|
}
|
||||||
@@ -1171,11 +1175,6 @@ R3Stretcher::adjustFormant(int c)
|
|||||||
for (auto &it : cd->scales) {
|
for (auto &it : cd->scales) {
|
||||||
|
|
||||||
int fftSize = it.first;
|
int fftSize = it.first;
|
||||||
if (isSingleWindowed() &&
|
|
||||||
fftSize != m_guideConfiguration.classificationFftSize) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto &scale = it.second;
|
auto &scale = it.second;
|
||||||
|
|
||||||
int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate));
|
int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate));
|
||||||
@@ -1186,9 +1185,10 @@ R3Stretcher::adjustFormant(int c)
|
|||||||
process_t maxRatio = 60.0;
|
process_t maxRatio = 60.0;
|
||||||
process_t minRatio = 1.0 / maxRatio;
|
process_t minRatio = 1.0 / maxRatio;
|
||||||
|
|
||||||
for (const auto &b : m_guideConfiguration.fftBandLimits) {
|
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
|
||||||
if (b.fftSize != fftSize) continue;
|
const auto &band = m_guideConfiguration.fftBandLimits[b];
|
||||||
for (int i = b.b0min; i < b.b1max && i < highBin; ++i) {
|
if (band.fftSize != fftSize) continue;
|
||||||
|
for (int i = band.b0min; i < band.b1max && i < highBin; ++i) {
|
||||||
process_t source = cd->formant->envelopeAt(i * sourceFactor);
|
process_t source = cd->formant->envelopeAt(i * sourceFactor);
|
||||||
process_t target = cd->formant->envelopeAt(i * targetFactor);
|
process_t target = cd->formant->envelopeAt(i * targetFactor);
|
||||||
if (target > 0.0) {
|
if (target > 0.0) {
|
||||||
@@ -1205,6 +1205,8 @@ R3Stretcher::adjustFormant(int c)
|
|||||||
void
|
void
|
||||||
R3Stretcher::adjustPreKick(int c)
|
R3Stretcher::adjustPreKick(int c)
|
||||||
{
|
{
|
||||||
|
//!!! if we aren't going to do this, we should modify Guide so as
|
||||||
|
//!!! not to do the small additional work of checking for it
|
||||||
if (isSingleWindowed()) return;
|
if (isSingleWindowed()) return;
|
||||||
|
|
||||||
Profiler profiler("R3Stretcher::adjustPreKick");
|
Profiler profiler("R3Stretcher::adjustPreKick");
|
||||||
@@ -1245,14 +1247,11 @@ R3Stretcher::synthesiseChannel(int c, int outhop, bool draining)
|
|||||||
int longest = m_guideConfiguration.longestFftSize;
|
int longest = m_guideConfiguration.longestFftSize;
|
||||||
|
|
||||||
auto &cd = m_channelData.at(c);
|
auto &cd = m_channelData.at(c);
|
||||||
|
|
||||||
for (const auto &band : cd->guidance.fftBands) {
|
|
||||||
int fftSize = band.fftSize;
|
|
||||||
|
|
||||||
if (isSingleWindowed() &&
|
for (int b = 0; b < cd->guidance.fftBandCount; ++b) {
|
||||||
fftSize != m_guideConfiguration.classificationFftSize) {
|
|
||||||
continue;
|
const auto &band = cd->guidance.fftBands[b];
|
||||||
}
|
int fftSize = band.fftSize;
|
||||||
|
|
||||||
auto &scale = cd->scales.at(fftSize);
|
auto &scale = cd->scales.at(fftSize);
|
||||||
auto &scaleData = m_scaleData.at(fftSize);
|
auto &scaleData = m_scaleData.at(fftSize);
|
||||||
|
|||||||
@@ -132,7 +132,7 @@ protected:
|
|||||||
FixedVector<process_t> accumulator;
|
FixedVector<process_t> accumulator;
|
||||||
int accumulatorFill;
|
int accumulatorFill;
|
||||||
|
|
||||||
ChannelScaleData(int _fftSize, int _longestFftSize) :
|
ChannelScaleData(int _fftSize, int _timeDomainLength) :
|
||||||
fftSize(_fftSize),
|
fftSize(_fftSize),
|
||||||
bufSize(fftSize/2 + 1),
|
bufSize(fftSize/2 + 1),
|
||||||
timeDomain(fftSize, 0.f),
|
timeDomain(fftSize, 0.f),
|
||||||
@@ -143,7 +143,7 @@ protected:
|
|||||||
advancedPhase(bufSize, 0.f),
|
advancedPhase(bufSize, 0.f),
|
||||||
prevMag(bufSize, 0.f),
|
prevMag(bufSize, 0.f),
|
||||||
pendingKick(bufSize, 0.f),
|
pendingKick(bufSize, 0.f),
|
||||||
accumulator(_longestFftSize, 0.f),
|
accumulator(_timeDomainLength, 0.f),
|
||||||
accumulatorFill(0)
|
accumulatorFill(0)
|
||||||
{ }
|
{ }
|
||||||
|
|
||||||
@@ -202,7 +202,7 @@ protected:
|
|||||||
std::unique_ptr<FormantData> formant;
|
std::unique_ptr<FormantData> formant;
|
||||||
ChannelData(BinSegmenter::Parameters segmenterParameters,
|
ChannelData(BinSegmenter::Parameters segmenterParameters,
|
||||||
BinClassifier::Parameters classifierParameters,
|
BinClassifier::Parameters classifierParameters,
|
||||||
int longestFftSize,
|
int timeDomainLength,
|
||||||
int inRingBufferSize,
|
int inRingBufferSize,
|
||||||
int outRingBufferSize) :
|
int outRingBufferSize) :
|
||||||
scales(),
|
scales(),
|
||||||
@@ -215,7 +215,7 @@ protected:
|
|||||||
BinClassifier::Classification::Residual),
|
BinClassifier::Classification::Residual),
|
||||||
segmenter(new BinSegmenter(segmenterParameters)),
|
segmenter(new BinSegmenter(segmenterParameters)),
|
||||||
segmentation(), prevSegmentation(), nextSegmentation(),
|
segmentation(), prevSegmentation(), nextSegmentation(),
|
||||||
mixdown(longestFftSize, 0.f), // though it could be shorter
|
mixdown(timeDomainLength, 0.f),
|
||||||
resampled(outRingBufferSize, 0.f),
|
resampled(outRingBufferSize, 0.f),
|
||||||
inbuf(new RingBuffer<float>(inRingBufferSize)),
|
inbuf(new RingBuffer<float>(inRingBufferSize)),
|
||||||
outbuf(new RingBuffer<float>(outRingBufferSize)),
|
outbuf(new RingBuffer<float>(outRingBufferSize)),
|
||||||
@@ -297,6 +297,7 @@ protected:
|
|||||||
std::map<int, std::shared_ptr<ScaleData>> m_scaleData;
|
std::map<int, std::shared_ptr<ScaleData>> m_scaleData;
|
||||||
Guide m_guide;
|
Guide m_guide;
|
||||||
Guide::Configuration m_guideConfiguration;
|
Guide::Configuration m_guideConfiguration;
|
||||||
|
int m_timeDomainFrameLength;
|
||||||
ChannelAssembly m_channelAssembly;
|
ChannelAssembly m_channelAssembly;
|
||||||
std::unique_ptr<StretchCalculator> m_calculator;
|
std::unique_ptr<StretchCalculator> m_calculator;
|
||||||
std::unique_ptr<Resampler> m_resampler;
|
std::unique_ptr<Resampler> m_resampler;
|
||||||
|
|||||||
Reference in New Issue
Block a user