Toward more accurate timing in the face of varying pitch ratio
This commit is contained in:
@@ -500,11 +500,12 @@ int main(int argc, char **argv)
|
||||
if (shortwin) options |= RubberBandStretcher::OptionWindowShort;
|
||||
if (smoothing) options |= RubberBandStretcher::OptionSmoothingOn;
|
||||
if (formant) options |= RubberBandStretcher::OptionFormantPreserved;
|
||||
if (hqpitch) options |= RubberBandStretcher::OptionPitchHighQuality;
|
||||
if (together) options |= RubberBandStretcher::OptionChannelsTogether;
|
||||
|
||||
if (freqOrPitchMapSpecified) {
|
||||
options |= RubberBandStretcher::OptionPitchHighConsistency;
|
||||
} else if (hqpitch) {
|
||||
options |= RubberBandStretcher::OptionPitchHighQuality;
|
||||
}
|
||||
|
||||
switch (threading) {
|
||||
@@ -647,13 +648,13 @@ int main(int argc, char **argv)
|
||||
int thisBlockSize = ibs;
|
||||
|
||||
while (freqMapItr != freqMap.end()) {
|
||||
size_t nextFreqFrame = freqMapItr->first + ts.getLatency();
|
||||
size_t nextFreqFrame = freqMapItr->first; // + ts.getLatency();
|
||||
if (nextFreqFrame <= countIn) {
|
||||
double s = frequencyshift * freqMapItr->second;
|
||||
if (debug > 0) {
|
||||
cerr << "at frame " << countIn
|
||||
<< " (requested at " << freqMapItr->first
|
||||
<< " plus latency " << ts.getLatency()
|
||||
<< " [NOT] plus latency " << ts.getLatency()
|
||||
<< ") updating frequency ratio to " << s << endl;
|
||||
}
|
||||
ts.setPitchScale(s);
|
||||
|
||||
@@ -44,9 +44,13 @@ StretchCalculator::StretchCalculator(size_t sampleRate,
|
||||
m_divergence(0),
|
||||
m_recovery(0),
|
||||
m_prevRatio(1.0),
|
||||
m_prevTimeRatio(1.0),
|
||||
m_transientAmnesty(0),
|
||||
m_debugLevel(0),
|
||||
m_useHardPeaks(useHardPeaks)
|
||||
m_useHardPeaks(useHardPeaks),
|
||||
m_inFrameCounter(0),
|
||||
m_frameCheckpoint(0, 0),
|
||||
m_outFrameCounter(0)
|
||||
{
|
||||
// std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl;
|
||||
}
|
||||
@@ -318,17 +322,106 @@ StretchCalculator::mapPeaks(std::vector<Peak> &peaks,
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
StretchCalculator::calculateSingle(double ratio,
|
||||
float df,
|
||||
size_t increment)
|
||||
int64_t
|
||||
StretchCalculator::expectedOutFrame(int64_t inFrame, double timeRatio)
|
||||
{
|
||||
int64_t checkpointedAt = m_frameCheckpoint.first;
|
||||
int64_t checkpointed = m_frameCheckpoint.second;
|
||||
return int64_t(round(checkpointed + (inFrame - checkpointedAt) * timeRatio));
|
||||
}
|
||||
|
||||
int
|
||||
StretchCalculator::calculateSingle(double timeRatio,
|
||||
double effectivePitchRatio,
|
||||
float df,
|
||||
size_t inIncrement,
|
||||
size_t analysisWindowSize,
|
||||
size_t synthesisWindowSize)
|
||||
{
|
||||
double ratio = timeRatio / effectivePitchRatio;
|
||||
|
||||
int increment = int(inIncrement);
|
||||
if (increment == 0) increment = m_increment;
|
||||
|
||||
int outIncrement = lrint(increment * ratio); // the normal case
|
||||
bool isTransient = false;
|
||||
|
||||
// We want to ensure, as close as possible, that the phase reset
|
||||
// points appear at _exactly_ the right audio frame numbers.
|
||||
// points appear at the right audio frame numbers. To this end we
|
||||
// track the incoming frame number, its corresponding expected
|
||||
// output frame number, and the actual output frame number
|
||||
// projected based on the ratios provided.
|
||||
//
|
||||
// There are two subtleties:
|
||||
//
|
||||
// (1) on a ratio change, we need to checkpoint the expected
|
||||
// output frame number reached so far and start counting again
|
||||
// with the new ratio. We could do this with a reset to zero, but
|
||||
// it's easier to reason about absolute input/output frame
|
||||
// matches, so for the moment at least we're doing this by
|
||||
// explicitly checkpointing the current numbers (hence the use of
|
||||
// the above expectedOutFrame() function which refers to the
|
||||
// last checkpointed values).
|
||||
//
|
||||
// (2) in the case of a pitch shift in a configuration where
|
||||
// resampling occurs after stretching, all of our output
|
||||
// increments will be effectively modified by resampling after we
|
||||
// return. This is why we separate out timeRatio and
|
||||
// effectivePitchRatio arguments - the former is the ratio that
|
||||
// has already been applied and the latter is the ratio that will
|
||||
// be applied by any subsequent resampling step (which will be 1.0
|
||||
// / pitchScale if resampling is happening after stretching). So
|
||||
// the overall ratio is timeRatio / effectivePitchRatio.
|
||||
|
||||
bool ratioChanged = (ratio != m_prevRatio);
|
||||
if (ratioChanged) {
|
||||
// Reset our frame counters from the ratio change.
|
||||
|
||||
// m_outFrameCounter tracks the frames counted at output from
|
||||
// this function, which normally precedes resampling - hence
|
||||
// the use of timeRatio rather than ratio here
|
||||
|
||||
if (m_debugLevel > 1) {
|
||||
std::cerr << "StretchCalculator: ratio changed from " << m_prevRatio << " to " << ratio << std::endl;
|
||||
}
|
||||
|
||||
int64_t toCheckpoint = expectedOutFrame
|
||||
(m_inFrameCounter, m_prevTimeRatio);
|
||||
m_frameCheckpoint =
|
||||
std::pair<int64_t, int64_t>(m_inFrameCounter, toCheckpoint);
|
||||
}
|
||||
|
||||
m_prevRatio = ratio;
|
||||
m_prevTimeRatio = timeRatio;
|
||||
|
||||
if (m_debugLevel > 2) {
|
||||
std::cerr << "StretchCalculator::calculateSingle: timeRatio = "
|
||||
<< timeRatio << ", effectivePitchRatio = "
|
||||
<< effectivePitchRatio << " (that's 1.0 / "
|
||||
<< (1.0 / effectivePitchRatio)
|
||||
<< "), ratio = " << ratio << ", df = " << df
|
||||
<< ", inIncrement = " << inIncrement
|
||||
<< ", default outIncrement = " << outIncrement
|
||||
<< ", analysisWindowSize = " << analysisWindowSize
|
||||
<< ", synthesisWindowSize = " << synthesisWindowSize
|
||||
<< std::endl;
|
||||
|
||||
std::cerr << "inFrameCounter = " << m_inFrameCounter
|
||||
<< ", outFrameCounter = " << m_outFrameCounter
|
||||
<< std::endl;
|
||||
|
||||
std::cerr << "The next sample out is input sample " << m_inFrameCounter << std::endl;
|
||||
}
|
||||
|
||||
int64_t intended = expectedOutFrame
|
||||
(m_inFrameCounter + analysisWindowSize/4, timeRatio);
|
||||
int64_t projected = int64_t
|
||||
(round(m_outFrameCounter + (synthesisWindowSize/4 * effectivePitchRatio)));
|
||||
m_divergence = projected - intended;
|
||||
|
||||
if (m_debugLevel > 2) {
|
||||
std::cerr << "for current frame + quarter frame: intended " << intended << ", projected " << projected << ", divergence " << m_divergence << std::endl;
|
||||
}
|
||||
|
||||
// In principle, the threshold depends on chunk size: larger chunk
|
||||
// sizes need higher thresholds. Since chunk size depends on
|
||||
@@ -350,53 +443,81 @@ StretchCalculator::calculateSingle(double ratio,
|
||||
|
||||
m_prevDf = df;
|
||||
|
||||
bool ratioChanged = (ratio != m_prevRatio);
|
||||
m_prevRatio = ratio;
|
||||
|
||||
if (isTransient && m_transientAmnesty == 0) {
|
||||
if (m_debugLevel > 1) {
|
||||
std::cerr << "StretchCalculator::calculateSingle: transient (df " << df << ", threshold " << transientThreshold << ")" << std::endl;
|
||||
if (m_transientAmnesty > 0) {
|
||||
if (isTransient) {
|
||||
if (m_debugLevel > 1) {
|
||||
std::cerr << "StretchCalculator::calculateSingle: transient, but we have an amnesty (df " << df << ", threshold " << transientThreshold << ")" << std::endl;
|
||||
}
|
||||
isTransient = false;
|
||||
}
|
||||
--m_transientAmnesty;
|
||||
}
|
||||
|
||||
if (isTransient) {
|
||||
if (m_debugLevel > 1) {
|
||||
std::cerr << "StretchCalculator::calculateSingle: transient at (df " << df << ", threshold " << transientThreshold << ")" << std::endl;
|
||||
}
|
||||
m_divergence += increment - (increment * ratio);
|
||||
|
||||
// as in offline mode, 0.05 sec approx min between transients
|
||||
m_transientAmnesty =
|
||||
lrint(ceil(double(m_sampleRate) / (20 * double(increment))));
|
||||
|
||||
m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment);
|
||||
return -int(increment);
|
||||
|
||||
outIncrement = increment;
|
||||
|
||||
} else {
|
||||
|
||||
if (ratioChanged) {
|
||||
m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment);
|
||||
}
|
||||
|
||||
int incr = lrint(outIncrement - m_recovery);
|
||||
if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) {
|
||||
std::cerr << "divergence = " << m_divergence << ", recovery = " << m_recovery << ", incr = " << incr << ", ";
|
||||
}
|
||||
if (incr < lrint((increment * ratio) / 2)) {
|
||||
incr = lrint((increment * ratio) / 2);
|
||||
} else if (incr > lrint(increment * ratio * 2)) {
|
||||
incr = lrint(increment * ratio * 2);
|
||||
}
|
||||
|
||||
double divdiff = (increment * ratio) - incr;
|
||||
|
||||
if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) {
|
||||
std::cerr << "possibly clamped to " << incr << ", divdiff = " << divdiff << std::endl;
|
||||
}
|
||||
|
||||
double prevDivergence = m_divergence;
|
||||
m_divergence -= divdiff;
|
||||
if ((prevDivergence < 0 && m_divergence > 0) ||
|
||||
(prevDivergence > 0 && m_divergence < 0)) {
|
||||
m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment);
|
||||
}
|
||||
|
||||
if (incr < 0) {
|
||||
std::cerr << "WARNING: internal error: incr < 0 in calculateSingle"
|
||||
<< std::endl;
|
||||
outIncrement = 0;
|
||||
} else {
|
||||
outIncrement = incr;
|
||||
}
|
||||
}
|
||||
|
||||
if (ratioChanged) {
|
||||
m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment);
|
||||
if (m_debugLevel > 1) {
|
||||
std::cerr << "StretchCalculator::calculateSingle: returning isTransient = "
|
||||
<< isTransient << ", outIncrement = " << outIncrement
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
if (m_transientAmnesty > 0) --m_transientAmnesty;
|
||||
m_inFrameCounter += inIncrement;
|
||||
m_outFrameCounter += outIncrement * effectivePitchRatio;
|
||||
|
||||
int incr = lrint(increment * ratio - m_recovery);
|
||||
if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) {
|
||||
std::cerr << "divergence = " << m_divergence << ", recovery = " << m_recovery << ", incr = " << incr << ", ";
|
||||
if (isTransient) {
|
||||
return -outIncrement;
|
||||
} else {
|
||||
return outIncrement;
|
||||
}
|
||||
if (incr < lrint((increment * ratio) / 2)) {
|
||||
incr = lrint((increment * ratio) / 2);
|
||||
} else if (incr > lrint(increment * ratio * 2)) {
|
||||
incr = lrint(increment * ratio * 2);
|
||||
}
|
||||
|
||||
double divdiff = (increment * ratio) - incr;
|
||||
|
||||
if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) {
|
||||
std::cerr << "divdiff = " << divdiff << std::endl;
|
||||
}
|
||||
|
||||
double prevDivergence = m_divergence;
|
||||
m_divergence -= divdiff;
|
||||
if ((prevDivergence < 0 && m_divergence > 0) ||
|
||||
(prevDivergence > 0 && m_divergence < 0)) {
|
||||
m_recovery = m_divergence / ((m_sampleRate / 10.0) / increment);
|
||||
}
|
||||
|
||||
return incr;
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -68,8 +68,12 @@ public:
|
||||
* If increment is non-zero, use it for the input increment for
|
||||
* this block in preference to m_increment.
|
||||
*/
|
||||
int calculateSingle(double ratio, float curveValue,
|
||||
size_t increment = 0);
|
||||
int calculateSingle(double timeRatio,
|
||||
double effectivePitchRatio,
|
||||
float curveValue,
|
||||
size_t increment,
|
||||
size_t analysisWindowSize,
|
||||
size_t synthesisWindowSize);
|
||||
|
||||
void setUseHardPeaks(bool use) { m_useHardPeaks = use; }
|
||||
|
||||
@@ -105,11 +109,16 @@ protected:
|
||||
size_t m_increment;
|
||||
float m_prevDf;
|
||||
double m_divergence;
|
||||
float m_recovery;
|
||||
float m_prevRatio;
|
||||
double m_recovery;
|
||||
double m_prevRatio;
|
||||
double m_prevTimeRatio;
|
||||
int m_transientAmnesty; // only in RT mode; handled differently offline
|
||||
int m_debugLevel;
|
||||
bool m_useHardPeaks;
|
||||
int64_t m_inFrameCounter;
|
||||
std::pair<int64_t, int64_t> m_frameCheckpoint;
|
||||
int64_t expectedOutFrame(int64_t inFrame, double timeRatio);
|
||||
double m_outFrameCounter;
|
||||
|
||||
std::map<size_t, size_t> m_keyFrameMap;
|
||||
std::vector<Peak> m_peaks;
|
||||
|
||||
@@ -738,11 +738,12 @@ RubberBandStretcher::Impl::configure()
|
||||
// number of onset detector chunks will be the number of audio
|
||||
// samples input, divided by the input increment, plus one.
|
||||
|
||||
//!!!
|
||||
// In real-time mode, we don't do this prefill -- it's better to
|
||||
// start with a swoosh than introduce more latency, and we don't
|
||||
// want gaps when the ratio changes.
|
||||
|
||||
if (!m_realtime) {
|
||||
// if (!m_realtime) {
|
||||
if (m_debugLevel > 1) {
|
||||
cerr << "Not real time mode: prefilling" << endl;
|
||||
}
|
||||
@@ -750,7 +751,7 @@ RubberBandStretcher::Impl::configure()
|
||||
m_channelData[c]->reset();
|
||||
m_channelData[c]->inbuf->zero(m_aWindowSize/2);
|
||||
}
|
||||
}
|
||||
// }
|
||||
}
|
||||
|
||||
|
||||
@@ -777,6 +778,8 @@ RubberBandStretcher::Impl::reconfigure()
|
||||
|
||||
calculateSizes();
|
||||
|
||||
bool somethingChanged = false;
|
||||
|
||||
// There are various allocations in this function, but they should
|
||||
// never happen in normal use -- they just recover from the case
|
||||
// where not all of the things we need were correctly created when
|
||||
@@ -811,12 +814,15 @@ RubberBandStretcher::Impl::reconfigure()
|
||||
m_channelData[c]->setSizes(std::max(m_aWindowSize, m_sWindowSize),
|
||||
m_fftSize);
|
||||
}
|
||||
|
||||
somethingChanged = true;
|
||||
}
|
||||
|
||||
if (m_outbufSize != prevOutbufSize) {
|
||||
for (size_t c = 0; c < m_channels; ++c) {
|
||||
m_channelData[c]->setOutbufSize(m_outbufSize);
|
||||
}
|
||||
somethingChanged = true;
|
||||
}
|
||||
|
||||
if (m_pitchScale != 1.0) {
|
||||
@@ -839,11 +845,22 @@ RubberBandStretcher::Impl::reconfigure()
|
||||
lrintf(ceil((m_increment * m_timeRatio * 2) / m_pitchScale));
|
||||
if (rbs < m_increment * 16) rbs = m_increment * 16;
|
||||
m_channelData[c]->setResampleBufSize(rbs);
|
||||
|
||||
somethingChanged = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (m_fftSize != prevFftSize) {
|
||||
m_phaseResetAudioCurve->setFftSize(m_fftSize);
|
||||
somethingChanged = true;
|
||||
}
|
||||
|
||||
if (m_debugLevel > 0) {
|
||||
if (somethingChanged) {
|
||||
std::cerr << "reconfigure: at least one parameter changed" << std::endl;
|
||||
} else {
|
||||
std::cerr << "reconfigure: nothing changed" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -616,8 +616,14 @@ RubberBandStretcher::Impl::calculateIncrements(size_t &phaseIncrementRtn,
|
||||
}
|
||||
}
|
||||
|
||||
double effectivePitchRatio = 1.0 / m_pitchScale;
|
||||
if (cd.resampler) {
|
||||
effectivePitchRatio = cd.resampler->getEffectiveRatio(effectivePitchRatio);
|
||||
}
|
||||
|
||||
int incr = m_stretchCalculator->calculateSingle
|
||||
(getEffectiveRatio(), df, m_increment);
|
||||
(m_timeRatio, effectivePitchRatio, df, m_increment,
|
||||
m_aWindowSize, m_sWindowSize);
|
||||
|
||||
if (m_lastProcessPhaseResetDf.getWriteSpace() > 0) {
|
||||
m_lastProcessPhaseResetDf.write(&df, 1);
|
||||
@@ -1142,11 +1148,13 @@ RubberBandStretcher::Impl::writeOutput(RingBuffer<float> &to, float *from, size_
|
||||
// samples, because the first chunk is centred on the start of the
|
||||
// output. In RT mode we didn't apply any pre-padding in
|
||||
// configure(), so we don't want to remove any here.
|
||||
|
||||
//!!!
|
||||
size_t startSkip = 0;
|
||||
if (!m_realtime) {
|
||||
// if (!m_realtime) {
|
||||
//!!! lock down the latency to this initial value in RT mode
|
||||
startSkip = lrintf((m_sWindowSize/2) / m_pitchScale);
|
||||
}
|
||||
// startSkip = m_sWindowSize/2;
|
||||
// }
|
||||
|
||||
if (outCount > startSkip) {
|
||||
|
||||
|
||||
@@ -220,6 +220,15 @@ BQResampler::resampleInterleaved(float *const out,
|
||||
return o / m_channels;
|
||||
}
|
||||
|
||||
double
|
||||
BQResampler::getEffectiveRatio(double ratio) const {
|
||||
if (m_initialised && ratio == m_s->parameters.ratio) {
|
||||
return m_s->parameters.effective;
|
||||
} else {
|
||||
return pick_params(ratio).effective;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
BQResampler::gcd(int a, int b) const
|
||||
{
|
||||
|
||||
@@ -60,6 +60,8 @@ public:
|
||||
const float *const in, int incount,
|
||||
double ratio, bool final);
|
||||
|
||||
double getEffectiveRatio(double ratio) const;
|
||||
|
||||
void reset();
|
||||
|
||||
private:
|
||||
|
||||
@@ -99,6 +99,7 @@ public:
|
||||
bool final) = 0;
|
||||
|
||||
virtual int getChannelCount() const = 0;
|
||||
virtual double getEffectiveRatio(double ratio) const = 0;
|
||||
|
||||
virtual void reset() = 0;
|
||||
};
|
||||
@@ -130,6 +131,7 @@ public:
|
||||
bool final = false);
|
||||
|
||||
int getChannelCount() const { return m_channels; }
|
||||
double getEffectiveRatio(double ratio) const { return ratio; }
|
||||
|
||||
void reset();
|
||||
|
||||
@@ -561,6 +563,7 @@ public:
|
||||
bool final = false);
|
||||
|
||||
int getChannelCount() const { return m_channels; }
|
||||
double getEffectiveRatio(double ratio) const { return ratio; }
|
||||
|
||||
void reset();
|
||||
|
||||
@@ -785,6 +788,7 @@ public:
|
||||
bool final);
|
||||
|
||||
int getChannelCount() const { return m_channels; }
|
||||
double getEffectiveRatio(double ratio) const { return ratio; }
|
||||
|
||||
void reset();
|
||||
|
||||
@@ -970,7 +974,13 @@ public:
|
||||
double ratio,
|
||||
bool final = false);
|
||||
|
||||
int getChannelCount() const { return m_channels; }
|
||||
int getChannelCount() const {
|
||||
return m_channels;
|
||||
}
|
||||
|
||||
double getEffectiveRatio(double ratio) const {
|
||||
return m_resampler->getEffectiveRatio(ratio);
|
||||
}
|
||||
|
||||
void reset();
|
||||
|
||||
@@ -1121,6 +1131,7 @@ public:
|
||||
bool final = false);
|
||||
|
||||
int getChannelCount() const { return m_channels; }
|
||||
double getEffectiveRatio(double ratio) const { return ratio; }
|
||||
|
||||
void reset();
|
||||
|
||||
@@ -1545,6 +1556,12 @@ Resampler::getChannelCount() const
|
||||
return d->getChannelCount();
|
||||
}
|
||||
|
||||
double
|
||||
Resampler::getEffectiveRatio(double ratio) const
|
||||
{
|
||||
return d->getEffectiveRatio(ratio);
|
||||
}
|
||||
|
||||
void
|
||||
Resampler::reset()
|
||||
{
|
||||
|
||||
@@ -148,6 +148,8 @@ public:
|
||||
|
||||
int getChannelCount() const;
|
||||
|
||||
double getEffectiveRatio(double ratio) const;
|
||||
|
||||
void reset();
|
||||
|
||||
class Impl;
|
||||
|
||||
Reference in New Issue
Block a user