In offline mode, create the resampler only if needed (i.e. if the pitch ratio is still 1.0 at the point when process is first called); and use plain 2048-sample fft with unity stretch

This commit is contained in:
Chris Cannam
2022-06-24 10:51:40 +01:00
parent 3b168ca55e
commit d65755427f
3 changed files with 84 additions and 42 deletions

View File

@@ -127,7 +127,8 @@ public:
// This is the classification and fallback FFT: we need it to // This is the classification and fallback FFT: we need it to
// go up to Nyquist so we can seamlessly switch to it for // go up to Nyquist so we can seamlessly switch to it for
// longer stretches // longer stretches, and down to 0.0 so we can use it for
// unity in offline mode
bandFftSize = roundUp(int(ceil(rate/32.0))); bandFftSize = roundUp(int(ceil(rate/32.0)));
m_configuration.fftBandLimits[1] = m_configuration.fftBandLimits[1] =
BandLimits(bandFftSize, rate, 0.0, rate / 2.0); BandLimits(bandFftSize, rate, 0.0, rate / 2.0);
@@ -154,6 +155,7 @@ public:
const BinSegmenter::Segmentation &nextSegmentation, const BinSegmenter::Segmentation &nextSegmentation,
double meanMagnitude, double meanMagnitude,
int unityCount, int unityCount,
bool realtime,
Guidance &guidance) const { Guidance &guidance) const {
bool hadPhaseReset = guidance.phaseReset.present; bool hadPhaseReset = guidance.phaseReset.present;
@@ -182,7 +184,8 @@ public:
hadPhaseReset, hadPhaseReset,
unityCount, unityCount,
magnitudes, magnitudes,
segmentation); segmentation,
realtime);
return; return;
} }
@@ -380,12 +383,28 @@ protected:
bool hadPhaseReset, bool hadPhaseReset,
uint32_t unityCount, uint32_t unityCount,
const double *const magnitudes, const double *const magnitudes,
const BinSegmenter::Segmentation &segmentation) const { const BinSegmenter::Segmentation &segmentation,
bool realtime) const {
// std::cout << "unity" << std::endl; // std::cout << "unity" << std::endl;
double nyquist = m_parameters.sampleRate / 2.0; double nyquist = m_parameters.sampleRate / 2.0;
if (!realtime) {
// ratio can't change, so we are just running 1.0 ratio
// throughout
guidance.fftBands[0].f0 = 0.0;
guidance.fftBands[0].f1 = 0.0;
guidance.fftBands[1].f0 = 0.0;
guidance.fftBands[1].f1 = nyquist;
guidance.fftBands[2].f0 = nyquist;
guidance.fftBands[2].f1 = nyquist;
guidance.phaseReset.present = true;
guidance.phaseReset.f0 = 0.0;
guidance.phaseReset.f1 = nyquist;
return;
}
guidance.fftBands[0].f0 = 0.0; guidance.fftBands[0].f0 = 0.0;
guidance.fftBands[0].f1 = m_minLower; guidance.fftBands[0].f1 = m_minLower;
guidance.fftBands[1].f0 = m_minLower; guidance.fftBands[1].f0 = m_minLower;

View File

@@ -104,22 +104,13 @@ R3Stretcher::R3Stretcher(Parameters parameters,
1, false, // no fixed inputIncrement 1, false, // no fixed inputIncrement
m_log)); m_log));
Resampler::Parameters resamplerParameters;
resamplerParameters.quality = Resampler::FastestTolerable;
if (isRealTime()) { if (isRealTime()) {
resamplerParameters.dynamism = Resampler::RatioOftenChanging; createResampler();
resamplerParameters.ratioChange = Resampler::SmoothRatioChange; // In offline mode we don't create the resampler yet - we
} else { // don't want to have one at all if the pitch ratio is 1.0,
// ratio can't be changed in offline mode // but that could change before the first process call, so we
resamplerParameters.dynamism = Resampler::RatioMostlyFixed; // create the resampler if needed then
resamplerParameters.ratioChange = Resampler::SuddenRatioChange;
} }
resamplerParameters.initialSampleRate = m_parameters.sampleRate;
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize; //!!!???
m_resampler = std::unique_ptr<Resampler>
(new Resampler(resamplerParameters, m_parameters.channels));
calculateHop(); calculateHop();
@@ -132,23 +123,6 @@ R3Stretcher::R3Stretcher(Parameters parameters,
if (!m_timeRatio.is_lock_free()) { if (!m_timeRatio.is_lock_free()) {
m_log.log(0, "WARNING: std::atomic<double> is not lock-free"); m_log.log(0, "WARNING: std::atomic<double> is not lock-free");
} }
// Pad to half of the longest frame. As with R2, in real-time mode
// we don't do this -- it's better to start with a swoosh than
// introduce more latency, and we don't want gaps when the ratio
// changes.
if (!isRealTime()) {
int pad = m_guideConfiguration.longestFftSize / 2;
m_log.log(1, "offline mode: prefilling with", pad);
for (int c = 0; c < m_parameters.channels; ++c) {
m_channelData[c]->inbuf->zero(pad);
}
// By the time we skip this later we will have resampled
m_startSkip = int(round(pad / m_pitchScale));
} else {
m_log.log(1, "realtime mode: no prefill");
}
} }
WindowType WindowType
@@ -249,6 +223,27 @@ R3Stretcher::setKeyFrameMap(const std::map<size_t, size_t> &mapping)
m_keyFrameMap = mapping; m_keyFrameMap = mapping;
} }
void
R3Stretcher::createResampler()
{
Resampler::Parameters resamplerParameters;
resamplerParameters.quality = Resampler::FastestTolerable;
resamplerParameters.initialSampleRate = m_parameters.sampleRate;
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize;
if (isRealTime()) {
resamplerParameters.dynamism = Resampler::RatioOftenChanging;
resamplerParameters.ratioChange = Resampler::SmoothRatioChange;
} else {
// ratio can't be changed in offline mode
resamplerParameters.dynamism = Resampler::RatioMostlyFixed;
resamplerParameters.ratioChange = Resampler::SuddenRatioChange;
}
m_resampler = std::unique_ptr<Resampler>
(new Resampler(resamplerParameters, m_parameters.channels));
}
void void
R3Stretcher::calculateHop() R3Stretcher::calculateHop()
{ {
@@ -398,7 +393,9 @@ void
R3Stretcher::reset() R3Stretcher::reset()
{ {
m_calculator->reset(); m_calculator->reset();
m_resampler->reset(); if (m_resampler) {
m_resampler->reset();
}
for (auto &it : m_scaleData) { for (auto &it : m_scaleData) {
it.second->guided.reset(); it.second->guided.reset();
@@ -463,12 +460,36 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final)
return; return;
} }
if (!isRealTime() && !m_keyFrameMap.empty()) { if (!isRealTime()) {
if (m_mode == ProcessMode::Studying) {
m_totalTargetDuration = if (m_mode == ProcessMode::JustCreated ||
size_t(round(m_studyInputDuration * getEffectiveRatio())); m_mode == ProcessMode::Studying) {
if (m_pitchScale != 1.0 && !m_resampler) {
createResampler();
}
// Pad to half of the longest frame. As with R2, in
// real-time mode we don't do this -- it's better to start
// with a swoosh than introduce more latency, and we don't
// want gaps when the ratio changes.
int pad = m_guideConfiguration.longestFftSize / 2;
m_log.log(1, "offline mode: prefilling with", pad);
for (int c = 0; c < m_parameters.channels; ++c) {
m_channelData[c]->inbuf->zero(pad);
}
// By the time we skip this later we will have resampled
m_startSkip = int(round(pad / m_pitchScale));
}
if (!m_keyFrameMap.empty()) {
if (m_mode == ProcessMode::Studying) {
m_totalTargetDuration =
size_t(round(m_studyInputDuration * getEffectiveRatio()));
}
updateRatioFromMap();
} }
updateRatioFromMap();
} }
if (final) { if (final) {
@@ -891,7 +912,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
double ratio = getEffectiveRatio(); double ratio = getEffectiveRatio();
if (fabs(ratio - 1.0) < 1.0e-6) { if (fabs(ratio - 1.0) < 1.0e-7) {
++m_unityCount; ++m_unityCount;
} else { } else {
m_unityCount = 0; m_unityCount = 0;
@@ -907,6 +928,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
cd->nextSegmentation, cd->nextSegmentation,
v_mean(classifyScale->mag.data() + 1, classify/2), v_mean(classifyScale->mag.data() + 1, classify/2),
m_unityCount, m_unityCount,
isRealTime(),
cd->guidance); cd->guidance);
/* /*
if (c == 0) { if (c == 0) {
@@ -1070,7 +1092,7 @@ R3Stretcher::synthesiseChannel(int c, int outhop)
scaleData->fft.inverse(scale->real.data(), scaleData->fft.inverse(scale->real.data(),
scale->imag.data(), scale->imag.data(),
scale->timeDomain.data()); scale->timeDomain.data());
v_fftshift(scale->timeDomain.data(), fftSize); v_fftshift(scale->timeDomain.data(), fftSize);
// Synthesis window may be shorter than analysis window, so // Synthesis window may be shorter than analysis window, so

View File

@@ -310,6 +310,7 @@ protected:
ProcessMode m_mode; ProcessMode m_mode;
void consume(); void consume();
void createResampler();
void calculateHop(); void calculateHop();
void updateRatioFromMap(); void updateRatioFromMap();
void analyseChannel(int channel, int inhop, int prevInhop, int prevOuthop); void analyseChannel(int channel, int inhop, int prevInhop, int prevOuthop);