diff --git a/main/main.cpp b/main/main.cpp index b13e721..6198e22 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -500,11 +500,12 @@ int main(int argc, char **argv) if (shortwin) options |= RubberBandStretcher::OptionWindowShort; if (smoothing) options |= RubberBandStretcher::OptionSmoothingOn; if (formant) options |= RubberBandStretcher::OptionFormantPreserved; - if (hqpitch) options |= RubberBandStretcher::OptionPitchHighQuality; if (together) options |= RubberBandStretcher::OptionChannelsTogether; if (freqOrPitchMapSpecified) { options |= RubberBandStretcher::OptionPitchHighConsistency; + } else if (hqpitch) { + options |= RubberBandStretcher::OptionPitchHighQuality; } switch (threading) { @@ -647,13 +648,13 @@ int main(int argc, char **argv) int thisBlockSize = ibs; while (freqMapItr != freqMap.end()) { - size_t nextFreqFrame = freqMapItr->first + ts.getLatency(); + size_t nextFreqFrame = freqMapItr->first; // + ts.getLatency(); if (nextFreqFrame <= countIn) { double s = frequencyshift * freqMapItr->second; if (debug > 0) { cerr << "at frame " << countIn << " (requested at " << freqMapItr->first - << " plus latency " << ts.getLatency() + << " [NOT] plus latency " << ts.getLatency() << ") updating frequency ratio to " << s << endl; } ts.setPitchScale(s); diff --git a/src/StretchCalculator.cpp b/src/StretchCalculator.cpp index 2886ec3..a6d1397 100644 --- a/src/StretchCalculator.cpp +++ b/src/StretchCalculator.cpp @@ -44,9 +44,13 @@ StretchCalculator::StretchCalculator(size_t sampleRate, m_divergence(0), m_recovery(0), m_prevRatio(1.0), + m_prevTimeRatio(1.0), m_transientAmnesty(0), m_debugLevel(0), - m_useHardPeaks(useHardPeaks) + m_useHardPeaks(useHardPeaks), + m_inFrameCounter(0), + m_frameCheckpoint(0, 0), + m_outFrameCounter(0) { // std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl; } @@ -318,18 +322,107 @@ StretchCalculator::mapPeaks(std::vector &peaks, } } -int -StretchCalculator::calculateSingle(double ratio, - float df, - size_t increment) +int64_t +StretchCalculator::expectedOutFrame(int64_t inFrame, double timeRatio) { + int64_t checkpointedAt = m_frameCheckpoint.first; + int64_t checkpointed = m_frameCheckpoint.second; + return int64_t(round(checkpointed + (inFrame - checkpointedAt) * timeRatio)); +} + +int +StretchCalculator::calculateSingle(double timeRatio, + double effectivePitchRatio, + float df, + size_t inIncrement, + size_t analysisWindowSize, + size_t synthesisWindowSize) +{ + double ratio = timeRatio / effectivePitchRatio; + + int increment = int(inIncrement); if (increment == 0) increment = m_increment; + int outIncrement = lrint(increment * ratio); // the normal case bool isTransient = false; - + // We want to ensure, as close as possible, that the phase reset - // points appear at _exactly_ the right audio frame numbers. + // points appear at the right audio frame numbers. To this end we + // track the incoming frame number, its corresponding expected + // output frame number, and the actual output frame number + // projected based on the ratios provided. + // + // There are two subtleties: + // + // (1) on a ratio change, we need to checkpoint the expected + // output frame number reached so far and start counting again + // with the new ratio. We could do this with a reset to zero, but + // it's easier to reason about absolute input/output frame + // matches, so for the moment at least we're doing this by + // explicitly checkpointing the current numbers (hence the use of + // the above expectedOutFrame() function which refers to the + // last checkpointed values). + // + // (2) in the case of a pitch shift in a configuration where + // resampling occurs after stretching, all of our output + // increments will be effectively modified by resampling after we + // return. This is why we separate out timeRatio and + // effectivePitchRatio arguments - the former is the ratio that + // has already been applied and the latter is the ratio that will + // be applied by any subsequent resampling step (which will be 1.0 + // / pitchScale if resampling is happening after stretching). So + // the overall ratio is timeRatio / effectivePitchRatio. + bool ratioChanged = (ratio != m_prevRatio); + if (ratioChanged) { + // Reset our frame counters from the ratio change. + + // m_outFrameCounter tracks the frames counted at output from + // this function, which normally precedes resampling - hence + // the use of timeRatio rather than ratio here + + if (m_debugLevel > 1) { + std::cerr << "StretchCalculator: ratio changed from " << m_prevRatio << " to " << ratio << std::endl; + } + + int64_t toCheckpoint = expectedOutFrame + (m_inFrameCounter, m_prevTimeRatio); + m_frameCheckpoint = + std::pair(m_inFrameCounter, toCheckpoint); + } + + m_prevRatio = ratio; + m_prevTimeRatio = timeRatio; + + if (m_debugLevel > 2) { + std::cerr << "StretchCalculator::calculateSingle: timeRatio = " + << timeRatio << ", effectivePitchRatio = " + << effectivePitchRatio << " (that's 1.0 / " + << (1.0 / effectivePitchRatio) + << "), ratio = " << ratio << ", df = " << df + << ", inIncrement = " << inIncrement + << ", default outIncrement = " << outIncrement + << ", analysisWindowSize = " << analysisWindowSize + << ", synthesisWindowSize = " << synthesisWindowSize + << std::endl; + + std::cerr << "inFrameCounter = " << m_inFrameCounter + << ", outFrameCounter = " << m_outFrameCounter + << std::endl; + + std::cerr << "The next sample out is input sample " << m_inFrameCounter << std::endl; + } + + int64_t intended = expectedOutFrame + (m_inFrameCounter + analysisWindowSize/4, timeRatio); + int64_t projected = int64_t + (round(m_outFrameCounter + (synthesisWindowSize/4 * effectivePitchRatio))); + m_divergence = projected - intended; + + if (m_debugLevel > 2) { + std::cerr << "for current frame + quarter frame: intended " << intended << ", projected " << projected << ", divergence " << m_divergence << std::endl; + } + // In principle, the threshold depends on chunk size: larger chunk // sizes need higher thresholds. Since chunk size depends on // ratio, I suppose we could in theory calculate the threshold @@ -350,53 +443,81 @@ StretchCalculator::calculateSingle(double ratio, m_prevDf = df; - bool ratioChanged = (ratio != m_prevRatio); - m_prevRatio = ratio; - - if (isTransient && m_transientAmnesty == 0) { - if (m_debugLevel > 1) { - std::cerr << "StretchCalculator::calculateSingle: transient (df " << df << ", threshold " << transientThreshold << ")" << std::endl; + if (m_transientAmnesty > 0) { + if (isTransient) { + if (m_debugLevel > 1) { + std::cerr << "StretchCalculator::calculateSingle: transient, but we have an amnesty (df " << df << ", threshold " << transientThreshold << ")" << std::endl; + } + isTransient = false; + } + --m_transientAmnesty; + } + + if (isTransient) { + if (m_debugLevel > 1) { + std::cerr << "StretchCalculator::calculateSingle: transient at (df " << df << ", threshold " << transientThreshold << ")" << std::endl; } - m_divergence += increment - (increment * ratio); // as in offline mode, 0.05 sec approx min between transients m_transientAmnesty = lrint(ceil(double(m_sampleRate) / (20 * double(increment)))); m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment); - return -int(increment); + + outIncrement = increment; + + } else { + + if (ratioChanged) { + m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment); + } + + int incr = lrint(outIncrement - m_recovery); + if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) { + std::cerr << "divergence = " << m_divergence << ", recovery = " << m_recovery << ", incr = " << incr << ", "; + } + if (incr < lrint((increment * ratio) / 2)) { + incr = lrint((increment * ratio) / 2); + } else if (incr > lrint(increment * ratio * 2)) { + incr = lrint(increment * ratio * 2); + } + + double divdiff = (increment * ratio) - incr; + + if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) { + std::cerr << "possibly clamped to " << incr << ", divdiff = " << divdiff << std::endl; + } + + double prevDivergence = m_divergence; + m_divergence -= divdiff; + if ((prevDivergence < 0 && m_divergence > 0) || + (prevDivergence > 0 && m_divergence < 0)) { + m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment); + } + + if (incr < 0) { + std::cerr << "WARNING: internal error: incr < 0 in calculateSingle" + << std::endl; + outIncrement = 0; + } else { + outIncrement = incr; + } } - if (ratioChanged) { - m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment); + if (m_debugLevel > 1) { + std::cerr << "StretchCalculator::calculateSingle: returning isTransient = " + << isTransient << ", outIncrement = " << outIncrement + << std::endl; } - if (m_transientAmnesty > 0) --m_transientAmnesty; - - int incr = lrint(increment * ratio - m_recovery); - if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) { - std::cerr << "divergence = " << m_divergence << ", recovery = " << m_recovery << ", incr = " << incr << ", "; + m_inFrameCounter += inIncrement; + m_outFrameCounter += outIncrement * effectivePitchRatio; + + if (isTransient) { + return -outIncrement; + } else { + return outIncrement; } - if (incr < lrint((increment * ratio) / 2)) { - incr = lrint((increment * ratio) / 2); - } else if (incr > lrint(increment * ratio * 2)) { - incr = lrint(increment * ratio * 2); - } - - double divdiff = (increment * ratio) - incr; - - if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) { - std::cerr << "divdiff = " << divdiff << std::endl; - } - - double prevDivergence = m_divergence; - m_divergence -= divdiff; - if ((prevDivergence < 0 && m_divergence > 0) || - (prevDivergence > 0 && m_divergence < 0)) { - m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment); - } - - return incr; } void diff --git a/src/StretchCalculator.h b/src/StretchCalculator.h index fc3822d..4f05e3e 100644 --- a/src/StretchCalculator.h +++ b/src/StretchCalculator.h @@ -68,8 +68,12 @@ public: * If increment is non-zero, use it for the input increment for * this block in preference to m_increment. */ - int calculateSingle(double ratio, float curveValue, - size_t increment = 0); + int calculateSingle(double timeRatio, + double effectivePitchRatio, + float curveValue, + size_t increment, + size_t analysisWindowSize, + size_t synthesisWindowSize); void setUseHardPeaks(bool use) { m_useHardPeaks = use; } @@ -105,11 +109,16 @@ protected: size_t m_increment; float m_prevDf; double m_divergence; - float m_recovery; - float m_prevRatio; + double m_recovery; + double m_prevRatio; + double m_prevTimeRatio; int m_transientAmnesty; // only in RT mode; handled differently offline int m_debugLevel; bool m_useHardPeaks; + int64_t m_inFrameCounter; + std::pair m_frameCheckpoint; + int64_t expectedOutFrame(int64_t inFrame, double timeRatio); + double m_outFrameCounter; std::map m_keyFrameMap; std::vector m_peaks; diff --git a/src/StretcherImpl.cpp b/src/StretcherImpl.cpp index ca5b52a..cc7beb6 100644 --- a/src/StretcherImpl.cpp +++ b/src/StretcherImpl.cpp @@ -738,11 +738,12 @@ RubberBandStretcher::Impl::configure() // number of onset detector chunks will be the number of audio // samples input, divided by the input increment, plus one. + //!!! // In real-time mode, we don't do this prefill -- it's better to // start with a swoosh than introduce more latency, and we don't // want gaps when the ratio changes. - if (!m_realtime) { +// if (!m_realtime) { if (m_debugLevel > 1) { cerr << "Not real time mode: prefilling" << endl; } @@ -750,7 +751,7 @@ RubberBandStretcher::Impl::configure() m_channelData[c]->reset(); m_channelData[c]->inbuf->zero(m_aWindowSize/2); } - } +// } } @@ -777,6 +778,8 @@ RubberBandStretcher::Impl::reconfigure() calculateSizes(); + bool somethingChanged = false; + // There are various allocations in this function, but they should // never happen in normal use -- they just recover from the case // where not all of the things we need were correctly created when @@ -811,12 +814,15 @@ RubberBandStretcher::Impl::reconfigure() m_channelData[c]->setSizes(std::max(m_aWindowSize, m_sWindowSize), m_fftSize); } + + somethingChanged = true; } if (m_outbufSize != prevOutbufSize) { for (size_t c = 0; c < m_channels; ++c) { m_channelData[c]->setOutbufSize(m_outbufSize); } + somethingChanged = true; } if (m_pitchScale != 1.0) { @@ -839,11 +845,22 @@ RubberBandStretcher::Impl::reconfigure() lrintf(ceil((m_increment * m_timeRatio * 2) / m_pitchScale)); if (rbs < m_increment * 16) rbs = m_increment * 16; m_channelData[c]->setResampleBufSize(rbs); + + somethingChanged = true; } } if (m_fftSize != prevFftSize) { m_phaseResetAudioCurve->setFftSize(m_fftSize); + somethingChanged = true; + } + + if (m_debugLevel > 0) { + if (somethingChanged) { + std::cerr << "reconfigure: at least one parameter changed" << std::endl; + } else { + std::cerr << "reconfigure: nothing changed" << std::endl; + } } } diff --git a/src/StretcherProcess.cpp b/src/StretcherProcess.cpp index b8f5a49..fdc697e 100644 --- a/src/StretcherProcess.cpp +++ b/src/StretcherProcess.cpp @@ -616,8 +616,14 @@ RubberBandStretcher::Impl::calculateIncrements(size_t &phaseIncrementRtn, } } + double effectivePitchRatio = 1.0 / m_pitchScale; + if (cd.resampler) { + effectivePitchRatio = cd.resampler->getEffectiveRatio(effectivePitchRatio); + } + int incr = m_stretchCalculator->calculateSingle - (getEffectiveRatio(), df, m_increment); + (m_timeRatio, effectivePitchRatio, df, m_increment, + m_aWindowSize, m_sWindowSize); if (m_lastProcessPhaseResetDf.getWriteSpace() > 0) { m_lastProcessPhaseResetDf.write(&df, 1); @@ -1142,11 +1148,13 @@ RubberBandStretcher::Impl::writeOutput(RingBuffer &to, float *from, size_ // samples, because the first chunk is centred on the start of the // output. In RT mode we didn't apply any pre-padding in // configure(), so we don't want to remove any here. - +//!!! size_t startSkip = 0; - if (!m_realtime) { +// if (!m_realtime) { + //!!! lock down the latency to this initial value in RT mode startSkip = lrintf((m_sWindowSize/2) / m_pitchScale); - } +// startSkip = m_sWindowSize/2; +// } if (outCount > startSkip) { diff --git a/src/dsp/BQResampler.cpp b/src/dsp/BQResampler.cpp index 2ec624e..0804a28 100644 --- a/src/dsp/BQResampler.cpp +++ b/src/dsp/BQResampler.cpp @@ -220,6 +220,15 @@ BQResampler::resampleInterleaved(float *const out, return o / m_channels; } +double +BQResampler::getEffectiveRatio(double ratio) const { + if (m_initialised && ratio == m_s->parameters.ratio) { + return m_s->parameters.effective; + } else { + return pick_params(ratio).effective; + } +} + int BQResampler::gcd(int a, int b) const { diff --git a/src/dsp/BQResampler.h b/src/dsp/BQResampler.h index c05af1d..9ca0603 100644 --- a/src/dsp/BQResampler.h +++ b/src/dsp/BQResampler.h @@ -60,6 +60,8 @@ public: const float *const in, int incount, double ratio, bool final); + double getEffectiveRatio(double ratio) const; + void reset(); private: diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index dda917d..13336f5 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -99,6 +99,7 @@ public: bool final) = 0; virtual int getChannelCount() const = 0; + virtual double getEffectiveRatio(double ratio) const = 0; virtual void reset() = 0; }; @@ -130,6 +131,7 @@ public: bool final = false); int getChannelCount() const { return m_channels; } + double getEffectiveRatio(double ratio) const { return ratio; } void reset(); @@ -561,6 +563,7 @@ public: bool final = false); int getChannelCount() const { return m_channels; } + double getEffectiveRatio(double ratio) const { return ratio; } void reset(); @@ -785,6 +788,7 @@ public: bool final); int getChannelCount() const { return m_channels; } + double getEffectiveRatio(double ratio) const { return ratio; } void reset(); @@ -970,7 +974,13 @@ public: double ratio, bool final = false); - int getChannelCount() const { return m_channels; } + int getChannelCount() const { + return m_channels; + } + + double getEffectiveRatio(double ratio) const { + return m_resampler->getEffectiveRatio(ratio); + } void reset(); @@ -1121,6 +1131,7 @@ public: bool final = false); int getChannelCount() const { return m_channels; } + double getEffectiveRatio(double ratio) const { return ratio; } void reset(); @@ -1545,6 +1556,12 @@ Resampler::getChannelCount() const return d->getChannelCount(); } +double +Resampler::getEffectiveRatio(double ratio) const +{ + return d->getEffectiveRatio(ratio); +} + void Resampler::reset() { diff --git a/src/dsp/Resampler.h b/src/dsp/Resampler.h index 1bb28a8..775fda7 100644 --- a/src/dsp/Resampler.h +++ b/src/dsp/Resampler.h @@ -148,6 +148,8 @@ public: int getChannelCount() const; + double getEffectiveRatio(double ratio) const; + void reset(); class Impl;