Handle non-ideal sample rates in R3; document sample rate parameter

This commit is contained in:
Chris Cannam
2022-09-26 16:02:13 +01:00
parent f586fe601a
commit 9e423cdd8c
9 changed files with 88 additions and 44 deletions

View File

@@ -464,6 +464,13 @@ public:
* Construct a time and pitch stretcher object to run at the given * Construct a time and pitch stretcher object to run at the given
* sample rate, with the given number of channels. * sample rate, with the given number of channels.
* *
* Both of the stretcher engines provide their best balance of
* quality with efficiency at sample rates of 44100 or 48000 Hz.
* Other rates may be used, and the stretcher should produce
* sensible output with any rate from 8000 to 192000 Hz, but you
* are advised to use 44100 or 48000 where practical. Do not use
* rates below 8000 or above 192000 Hz.
*
* Initial time and pitch scaling ratios and other processing * Initial time and pitch scaling ratios and other processing
* options may be provided. In particular, the behaviour of the * options may be provided. In particular, the behaviour of the
* stretcher depends strongly on whether offline or real-time mode * stretcher depends strongly on whether offline or real-time mode

View File

@@ -66,4 +66,19 @@ void pickNearestRational(double ratio, int max_denom, int &num, int &denom)
} }
} }
size_t roundUp(size_t value)
{
if (!(value & (value - 1))) return value;
size_t bits = 0;
while (value) { ++bits; value >>= 1; }
value = size_t(1) << bits;
return value;
}
size_t roundUpDiv(double divisionOf, size_t divisor)
{
if (divisionOf < 0.0) return 0;
return roundUp(size_t(ceil(divisionOf / double(divisor))));
}
} }

View File

@@ -55,6 +55,10 @@ inline double frequencyForBin(int b, int fftSize, double sampleRate) {
void pickNearestRational(double ratio, int maxDenom, int &num, int &denom); void pickNearestRational(double ratio, int maxDenom, int &num, int &denom);
size_t roundUp(size_t value); // to nearest power of two
size_t roundUpDiv(double divisionOf, size_t divisor);
} }
#endif #endif

View File

@@ -33,6 +33,7 @@
#include "../common/Resampler.h" #include "../common/Resampler.h"
#include "../common/Profiler.h" #include "../common/Profiler.h"
#include "../common/sysutils.h" #include "../common/sysutils.h"
#include "../common/mathmisc.h"
#include <cassert> #include <cassert>
#include <cmath> #include <cmath>
@@ -365,16 +366,6 @@ R2Stretcher::getEffectiveRatio() const
return m_timeRatio * m_pitchScale; return m_timeRatio * m_pitchScale;
} }
size_t
R2Stretcher::roundUp(size_t value)
{
if (!(value & (value - 1))) return value;
size_t bits = 0;
while (value) { ++bits; value >>= 1; }
value = size_t(1) << bits;
return value;
}
void void
R2Stretcher::calculateSizes() R2Stretcher::calculateSizes()
{ {

View File

@@ -131,8 +131,6 @@ protected:
double getEffectiveRatio() const; double getEffectiveRatio() const;
size_t roundUp(size_t value); // to next power of two
template <typename T, typename S> template <typename T, typename S>
void cutShiftAndFold(T *target, int targetSize, void cutShiftAndFold(T *target, int targetSize,
S *src, // destructive to src S *src, // destructive to src

View File

@@ -122,8 +122,14 @@ public:
m_log.log(1, "Guide: rate and single-window mode", m_log.log(1, "Guide: rate and single-window mode",
rate, m_parameters.singleWindowMode); rate, m_parameters.singleWindowMode);
int classificationFftSize = int classificationFftSize = roundUpDiv(parameters.sampleRate, 32);
roundUp(int(ceil(parameters.sampleRate / 32.0)));
int minClassificationFftSize = 1024;
if (classificationFftSize < minClassificationFftSize) {
m_log.log(1, "Guide: sample rate is too low to work well");
m_log.log(1, "Guide: rounding up classification FFT size from and to", classificationFftSize, minClassificationFftSize);
classificationFftSize = minClassificationFftSize;
}
m_configuration.classificationFftSize = classificationFftSize; m_configuration.classificationFftSize = classificationFftSize;
@@ -247,6 +253,15 @@ public:
if (outhop > 256) { if (outhop > 256) {
guidance.phaseLockBands[2].p = 4; guidance.phaseLockBands[2].p = 4;
} }
for (int i = 0; i < 3; ++i) {
if (guidance.phaseLockBands[i].f0 > nyquist) {
guidance.phaseLockBands[i].f0 = nyquist;
}
if (guidance.phaseLockBands[i].f1 > nyquist) {
guidance.phaseLockBands[i].f1 = nyquist;
}
}
} else { } else {
@@ -461,16 +476,6 @@ protected:
double m_maxLower; double m_maxLower;
double m_maxHigher; double m_maxHigher;
// near-dupe with R2 RubberBandStretcher::Impl
int roundUp(int value) const {
if (value < 1) return 1;
if (!(value & (value - 1))) return value;
size_t bits = 0;
while (value) { ++bits; value >>= 1; }
value = size_t(1) << bits;
return value;
}
void updateForSilence(Guidance &guidance) const { void updateForSilence(Guidance &guidance) const {
// std::cout << "phase reset on silence" << std::endl; // std::cout << "phase reset on silence" << std::endl;
double nyquist = m_parameters.sampleRate / 2.0; double nyquist = m_parameters.sampleRate / 2.0;
@@ -575,6 +580,7 @@ protected:
int b = binForFrequency(f, m_configuration.classificationFftSize, int b = binForFrequency(f, m_configuration.classificationFftSize,
m_parameters.sampleRate); m_parameters.sampleRate);
int n = m_configuration.classificationFftSize/2; int n = m_configuration.classificationFftSize/2;
if (b > n) b = n;
for (int i = 0; i < 3; ++i) { for (int i = 0; i < 3; ++i) {
if (b < n && magnitudes[b+1] < magnitudes[b]) { if (b < n && magnitudes[b+1] < magnitudes[b]) {
++b; ++b;

View File

@@ -141,7 +141,9 @@ public:
int endBin = binForFrequency int endBin = binForFrequency
(band.f1, m_parameters.fftSize, m_parameters.sampleRate); (band.f1, m_parameters.fftSize, m_parameters.sampleRate);
if (startBin > highest || endBin < lowest) continue; if (startBin > highest || endBin < lowest) continue;
if (endBin > highest) endBin = highest;
int count = endBin - startBin + 1; int count = endBin - startBin + 1;
if (count < 1) continue;
m_peakPicker.findNearestAndNextPeaks(mag[c], m_peakPicker.findNearestAndNextPeaks(mag[c],
startBin, count, startBin, count,
band.p, m_currentPeaks[c], band.p, m_currentPeaks[c],

View File

@@ -34,9 +34,9 @@ R3Stretcher::R3Stretcher(Parameters parameters,
double initialTimeRatio, double initialTimeRatio,
double initialPitchScale, double initialPitchScale,
Log log) : Log log) :
m_parameters(parameters),
m_limits(parameters.options),
m_log(log), m_log(log),
m_parameters(validateSampleRate(parameters)),
m_limits(parameters.options, m_parameters.sampleRate),
m_timeRatio(initialTimeRatio), m_timeRatio(initialTimeRatio),
m_pitchScale(initialPitchScale), m_pitchScale(initialPitchScale),
m_formantScale(0.0), m_formantScale(0.0),
@@ -145,10 +145,10 @@ R3Stretcher::R3Stretcher(Parameters parameters,
m_prevOuthop = int(round(m_inhop * getEffectiveRatio())); m_prevOuthop = int(round(m_inhop * getEffectiveRatio()));
if (!m_inhop.is_lock_free()) { if (!m_inhop.is_lock_free()) {
m_log.log(0, "WARNING: std::atomic<int> is not lock-free"); m_log.log(0, "R3Stretcher: WARNING: std::atomic<int> is not lock-free");
} }
if (!m_timeRatio.is_lock_free()) { if (!m_timeRatio.is_lock_free()) {
m_log.log(0, "WARNING: std::atomic<double> is not lock-free"); m_log.log(0, "R3Stretcher: WARNING: std::atomic<double> is not lock-free");
} }
} }
@@ -158,7 +158,7 @@ R3Stretcher::ScaleData::analysisWindowShape()
if (singleWindowMode) { if (singleWindowMode) {
return HannWindow; return HannWindow;
} else { } else {
if (fftSize > 2048) return HannWindow; if (fftSize < 1024 || fftSize > 2048) return HannWindow;
else return NiemitaloForwardWindow; else return NiemitaloForwardWindow;
} }
} }
@@ -175,7 +175,7 @@ R3Stretcher::ScaleData::synthesisWindowShape()
if (singleWindowMode) { if (singleWindowMode) {
return HannWindow; return HannWindow;
} else { } else {
if (fftSize > 2048) return HannWindow; if (fftSize < 1024 || fftSize > 2048) return HannWindow;
else return NiemitaloReverseWindow; else return NiemitaloReverseWindow;
} }
} }
@@ -303,7 +303,7 @@ R3Stretcher::createResampler()
areWeResampling(&before, &after); areWeResampling(&before, &after);
if (before) { if (before) {
if (after) { if (after) {
m_log.log(0, "WARNING: createResampler: we think we are resampling both before and after!"); m_log.log(0, "R3Stretcher: WARNING: we think we are resampling both before and after!");
} else { } else {
m_log.log(1, "createResampler: resampling before"); m_log.log(1, "createResampler: resampling before");
} }
@@ -356,12 +356,12 @@ R3Stretcher::calculateHop()
double inhop = proposedOuthop / ratio; double inhop = proposedOuthop / ratio;
if (inhop < m_limits.minInhop) { if (inhop < m_limits.minInhop) {
m_log.log(0, "WARNING: Ratio yields ideal inhop < minimum, results may be suspect", inhop, m_limits.minInhop); m_log.log(0, "R3Stretcher: WARNING: Ratio yields ideal inhop < minimum, results may be suspect", inhop, m_limits.minInhop);
inhop = m_limits.minInhop; inhop = m_limits.minInhop;
} }
if (inhop > m_limits.maxInhop) { if (inhop > m_limits.maxInhop) {
// Log level 1, this is not as big a deal as < minInhop above // Log level 1, this is not as big a deal as < minInhop above
m_log.log(1, "WARNING: Ratio yields ideal inhop > maximum, results may be suspect", inhop, m_limits.maxInhop); m_log.log(1, "R3Stretcher: WARNING: Ratio yields ideal inhop > maximum, results may be suspect", inhop, m_limits.maxInhop);
inhop = m_limits.maxInhop; inhop = m_limits.maxInhop;
} }
@@ -919,7 +919,7 @@ R3Stretcher::consume()
if (advanceCount > readSpace) { if (advanceCount > readSpace) {
// This should happen only when draining (Finished) // This should happen only when draining (Finished)
if (m_mode != ProcessMode::Finished) { if (m_mode != ProcessMode::Finished) {
m_log.log(0, "WARNING: readSpace < inhop when processing is not yet finished", readSpace, inhop); m_log.log(0, "R3Stretcher: WARNING: readSpace < inhop when processing is not yet finished", readSpace, inhop);
} }
advanceCount = readSpace; advanceCount = readSpace;
} }
@@ -1370,6 +1370,11 @@ R3Stretcher::synthesiseChannel(int c, int outhop, bool draining)
int highBin = binForFrequency(band.f1, fftSize, m_parameters.sampleRate); int highBin = binForFrequency(band.f1, fftSize, m_parameters.sampleRate);
if (highBin % 2 == 0 && highBin > 0) --highBin; if (highBin % 2 == 0 && highBin > 0) --highBin;
int n = scale->mag.size();
if (lowBin >= n) lowBin = n - 1;
if (highBin >= n) highBin = n - 1;
if (highBin < lowBin) highBin = lowBin;
if (lowBin > 0) { if (lowBin > 0) {
v_zero(scale->real.data(), lowBin); v_zero(scale->real.data(), lowBin);
v_zero(scale->imag.data(), lowBin); v_zero(scale->imag.data(), lowBin);

View File

@@ -109,18 +109,19 @@ protected:
int minInhop; int minInhop;
int maxInhopWithReadahead; int maxInhopWithReadahead;
int maxInhop; int maxInhop;
Limits(RubberBandStretcher::Options options) : Limits(RubberBandStretcher::Options options, double rate) :
minPreferredOuthop(128), // commented values are results when rate = 44100 or 48000
maxPreferredOuthop(512), minPreferredOuthop(roundUpDiv(rate, 512)), // 128
maxPreferredOuthop(roundUpDiv(rate, 128)), // 512
minInhop(1), minInhop(1),
maxInhopWithReadahead(1024), maxInhopWithReadahead(roundUpDiv(rate, 32)), // 1024
maxInhop(1024) { maxInhop(roundUpDiv(rate, 32)) { // 1024
if (options & RubberBandStretcher::OptionWindowShort) { if (options & RubberBandStretcher::OptionWindowShort) {
// See note in calculateHop // See note in calculateHop
minPreferredOuthop = 256; minPreferredOuthop = roundUpDiv(rate, 256); // 256
maxPreferredOuthop = 640; maxPreferredOuthop = (roundUpDiv(rate, 128) * 5) / 4; // 640
maxInhopWithReadahead = 512; maxInhopWithReadahead = roundUpDiv(rate, 128); // 512
maxInhop = 1560; maxInhop = (roundUpDiv(rate, 64) * 3) / 2; // 1536
} }
} }
}; };
@@ -311,9 +312,9 @@ protected:
int synthesisWindowLength(); int synthesisWindowLength();
}; };
Log m_log;
Parameters m_parameters; Parameters m_parameters;
const Limits m_limits; const Limits m_limits;
Log m_log;
std::atomic<double> m_timeRatio; std::atomic<double> m_timeRatio;
std::atomic<double> m_pitchScale; std::atomic<double> m_pitchScale;
@@ -365,6 +366,21 @@ protected:
int polarFromBin; int polarFromBin;
int polarBinCount; int polarBinCount;
}; };
Parameters validateSampleRate(const Parameters &params) {
Parameters validated { params };
double minRate = 8000.0, maxRate = 192000.0;
if (params.sampleRate < minRate) {
m_log.log(0, "R3Stretcher: WARNING: Unsupported sample rate", params.sampleRate);
m_log.log(0, "R3Stretcher: Minimum rate is", minRate);
validated.sampleRate = minRate;
} else if (params.sampleRate > maxRate) {
m_log.log(0, "R3Stretcher: WARNING: Unsupported sample rate", params.sampleRate);
m_log.log(0, "R3Stretcher: Maximum rate is", maxRate);
validated.sampleRate = maxRate;
}
return validated;
}
void convertToPolar(process_t *mag, process_t *phase, void convertToPolar(process_t *mag, process_t *phase,
const process_t *real, const process_t *imag, const process_t *real, const process_t *imag,