From d65755427f73aeeb4fa3693d0a2959ed181930a1 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 24 Jun 2022 10:51:40 +0100 Subject: [PATCH] In offline mode, create the resampler only if needed (i.e. if the pitch ratio is still 1.0 at the point when process is first called); and use plain 2048-sample fft with unity stretch --- src/finer/Guide.h | 25 ++++++++-- src/finer/R3Stretcher.cpp | 100 +++++++++++++++++++++++--------------- src/finer/R3Stretcher.h | 1 + 3 files changed, 84 insertions(+), 42 deletions(-) diff --git a/src/finer/Guide.h b/src/finer/Guide.h index 54d2599..bfb48a5 100644 --- a/src/finer/Guide.h +++ b/src/finer/Guide.h @@ -127,7 +127,8 @@ public: // This is the classification and fallback FFT: we need it to // go up to Nyquist so we can seamlessly switch to it for - // longer stretches + // longer stretches, and down to 0.0 so we can use it for + // unity in offline mode bandFftSize = roundUp(int(ceil(rate/32.0))); m_configuration.fftBandLimits[1] = BandLimits(bandFftSize, rate, 0.0, rate / 2.0); @@ -154,6 +155,7 @@ public: const BinSegmenter::Segmentation &nextSegmentation, double meanMagnitude, int unityCount, + bool realtime, Guidance &guidance) const { bool hadPhaseReset = guidance.phaseReset.present; @@ -182,7 +184,8 @@ public: hadPhaseReset, unityCount, magnitudes, - segmentation); + segmentation, + realtime); return; } @@ -380,12 +383,28 @@ protected: bool hadPhaseReset, uint32_t unityCount, const double *const magnitudes, - const BinSegmenter::Segmentation &segmentation) const { + const BinSegmenter::Segmentation &segmentation, + bool realtime) const { // std::cout << "unity" << std::endl; double nyquist = m_parameters.sampleRate / 2.0; + if (!realtime) { + // ratio can't change, so we are just running 1.0 ratio + // throughout + guidance.fftBands[0].f0 = 0.0; + guidance.fftBands[0].f1 = 0.0; + guidance.fftBands[1].f0 = 0.0; + guidance.fftBands[1].f1 = nyquist; + guidance.fftBands[2].f0 = nyquist; + guidance.fftBands[2].f1 = nyquist; + guidance.phaseReset.present = true; + guidance.phaseReset.f0 = 0.0; + guidance.phaseReset.f1 = nyquist; + return; + } + guidance.fftBands[0].f0 = 0.0; guidance.fftBands[0].f1 = m_minLower; guidance.fftBands[1].f0 = m_minLower; diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index 1fe3bb7..6441c4c 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -104,22 +104,13 @@ R3Stretcher::R3Stretcher(Parameters parameters, 1, false, // no fixed inputIncrement m_log)); - Resampler::Parameters resamplerParameters; - resamplerParameters.quality = Resampler::FastestTolerable; - if (isRealTime()) { - resamplerParameters.dynamism = Resampler::RatioOftenChanging; - resamplerParameters.ratioChange = Resampler::SmoothRatioChange; - } else { - // ratio can't be changed in offline mode - resamplerParameters.dynamism = Resampler::RatioMostlyFixed; - resamplerParameters.ratioChange = Resampler::SuddenRatioChange; + createResampler(); + // In offline mode we don't create the resampler yet - we + // don't want to have one at all if the pitch ratio is 1.0, + // but that could change before the first process call, so we + // create the resampler if needed then } - - resamplerParameters.initialSampleRate = m_parameters.sampleRate; - resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize; //!!!??? - m_resampler = std::unique_ptr - (new Resampler(resamplerParameters, m_parameters.channels)); calculateHop(); @@ -132,23 +123,6 @@ R3Stretcher::R3Stretcher(Parameters parameters, if (!m_timeRatio.is_lock_free()) { m_log.log(0, "WARNING: std::atomic is not lock-free"); } - - // Pad to half of the longest frame. As with R2, in real-time mode - // we don't do this -- it's better to start with a swoosh than - // introduce more latency, and we don't want gaps when the ratio - // changes. - - if (!isRealTime()) { - int pad = m_guideConfiguration.longestFftSize / 2; - m_log.log(1, "offline mode: prefilling with", pad); - for (int c = 0; c < m_parameters.channels; ++c) { - m_channelData[c]->inbuf->zero(pad); - } - // By the time we skip this later we will have resampled - m_startSkip = int(round(pad / m_pitchScale)); - } else { - m_log.log(1, "realtime mode: no prefill"); - } } WindowType @@ -249,6 +223,27 @@ R3Stretcher::setKeyFrameMap(const std::map &mapping) m_keyFrameMap = mapping; } +void +R3Stretcher::createResampler() +{ + Resampler::Parameters resamplerParameters; + resamplerParameters.quality = Resampler::FastestTolerable; + resamplerParameters.initialSampleRate = m_parameters.sampleRate; + resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize; + + if (isRealTime()) { + resamplerParameters.dynamism = Resampler::RatioOftenChanging; + resamplerParameters.ratioChange = Resampler::SmoothRatioChange; + } else { + // ratio can't be changed in offline mode + resamplerParameters.dynamism = Resampler::RatioMostlyFixed; + resamplerParameters.ratioChange = Resampler::SuddenRatioChange; + } + + m_resampler = std::unique_ptr + (new Resampler(resamplerParameters, m_parameters.channels)); +} + void R3Stretcher::calculateHop() { @@ -398,7 +393,9 @@ void R3Stretcher::reset() { m_calculator->reset(); - m_resampler->reset(); + if (m_resampler) { + m_resampler->reset(); + } for (auto &it : m_scaleData) { it.second->guided.reset(); @@ -463,12 +460,36 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final) return; } - if (!isRealTime() && !m_keyFrameMap.empty()) { - if (m_mode == ProcessMode::Studying) { - m_totalTargetDuration = - size_t(round(m_studyInputDuration * getEffectiveRatio())); + if (!isRealTime()) { + + if (m_mode == ProcessMode::JustCreated || + m_mode == ProcessMode::Studying) { + + if (m_pitchScale != 1.0 && !m_resampler) { + createResampler(); + } + + // Pad to half of the longest frame. As with R2, in + // real-time mode we don't do this -- it's better to start + // with a swoosh than introduce more latency, and we don't + // want gaps when the ratio changes. + + int pad = m_guideConfiguration.longestFftSize / 2; + m_log.log(1, "offline mode: prefilling with", pad); + for (int c = 0; c < m_parameters.channels; ++c) { + m_channelData[c]->inbuf->zero(pad); + } + // By the time we skip this later we will have resampled + m_startSkip = int(round(pad / m_pitchScale)); + } + + if (!m_keyFrameMap.empty()) { + if (m_mode == ProcessMode::Studying) { + m_totalTargetDuration = + size_t(round(m_studyInputDuration * getEffectiveRatio())); + } + updateRatioFromMap(); } - updateRatioFromMap(); } if (final) { @@ -891,7 +912,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) double ratio = getEffectiveRatio(); - if (fabs(ratio - 1.0) < 1.0e-6) { + if (fabs(ratio - 1.0) < 1.0e-7) { ++m_unityCount; } else { m_unityCount = 0; @@ -907,6 +928,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) cd->nextSegmentation, v_mean(classifyScale->mag.data() + 1, classify/2), m_unityCount, + isRealTime(), cd->guidance); /* if (c == 0) { @@ -1070,7 +1092,7 @@ R3Stretcher::synthesiseChannel(int c, int outhop) scaleData->fft.inverse(scale->real.data(), scale->imag.data(), scale->timeDomain.data()); - + v_fftshift(scale->timeDomain.data(), fftSize); // Synthesis window may be shorter than analysis window, so diff --git a/src/finer/R3Stretcher.h b/src/finer/R3Stretcher.h index 161c2b0..3f9cd4f 100644 --- a/src/finer/R3Stretcher.h +++ b/src/finer/R3Stretcher.h @@ -310,6 +310,7 @@ protected: ProcessMode m_mode; void consume(); + void createResampler(); void calculateHop(); void updateRatioFromMap(); void analyseChannel(int channel, int inhop, int prevInhop, int prevOuthop);