Update from personal repository.

* Added an initial "formant preservation" option when pitch shifting
 * Real-time pitch shifting now uses a faster method by default, with
   less variation in CPU usage
 * The code is more amenable to compiler auto-vectorization (through
   e.g. gcc --ftree-vectorize).
This commit is contained in:
Chris Cannam
2008-05-22 16:54:27 +00:00
parent 52a10829ef
commit 1d41c952e8
51 changed files with 3157 additions and 676 deletions

View File

@@ -3,7 +3,7 @@
/*
Rubber Band
An audio time-stretching and pitch-shifting library.
Copyright 2007 Chris Cannam.
Copyright 2007-2008 Chris Cannam.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
@@ -19,11 +19,14 @@
#include "StretchCalculator.h"
#include "StretcherChannelData.h"
#include "Resampler.h"
#include "Profiler.h"
#include <cassert>
#include <cmath>
#include <set>
#include <map>
#include <deque>
using std::cerr;
using std::endl;
@@ -97,9 +100,82 @@ RubberBandStretcher::Impl::ProcessThread::abandon()
m_abandoning = true;
}
bool
RubberBandStretcher::Impl::resampleBeforeStretching() const
{
// We can't resample before stretching in offline mode, because
// the stretch calculation is based on doing it the other way
// around. It would take more work (and testing) to enable this.
if (!m_realtime) return false;
if (m_options & OptionPitchHighQuality) {
return (m_pitchScale < 1.0); // better sound
} else {
return (m_pitchScale > 1.0); // better performance
}
}
size_t
RubberBandStretcher::Impl::consumeChannel(size_t c, const float *input,
size_t samples, bool final)
{
Profiler profiler("RubberBandStretcher::Impl::consumeChannel");
ChannelData &cd = *m_channelData[c];
RingBuffer<float> &inbuf = *cd.inbuf;
size_t toWrite = samples;
size_t writable = inbuf.getWriteSpace();
bool resampling = resampleBeforeStretching();
if (resampling) {
toWrite = int(ceil(samples / m_pitchScale));
if (writable < toWrite) {
samples = int(floor(writable * m_pitchScale));
if (samples == 0) return 0;
}
size_t reqSize = int(ceil(samples / m_pitchScale));
if (reqSize > cd.resamplebufSize) {
cerr << "WARNING: RubberBandStretcher::Impl::consumeChannel: resizing resampler buffer from "
<< cd.resamplebufSize << " to " << reqSize << endl;
cd.setResampleBufSize(reqSize);
}
toWrite = cd.resampler->resample(&input,
&cd.resamplebuf,
samples,
1.0 / m_pitchScale,
final);
}
if (writable < toWrite) {
if (resampling) {
return 0;
}
toWrite = writable;
}
if (resampling) {
inbuf.write(cd.resamplebuf, toWrite);
cd.inCount += samples;
return samples;
} else {
inbuf.write(input, toWrite);
cd.inCount += toWrite;
return toWrite;
}
}
void
RubberBandStretcher::Impl::processChunks(size_t c, bool &any, bool &last)
{
Profiler profiler("RubberBandStretcher::Impl::processChunks");
// Process as many chunks as there are available on the input
// buffer for channel c. This requires that the increments have
// already been calculated.
@@ -140,6 +216,8 @@ RubberBandStretcher::Impl::processChunks(size_t c, bool &any, bool &last)
bool
RubberBandStretcher::Impl::processOneChunk()
{
Profiler profiler("RubberBandStretcher::Impl::processOneChunk");
// Process a single chunk for all channels, provided there is
// enough data on each channel for at least one chunk. This is
// able to calculate increments as it goes along.
@@ -173,6 +251,8 @@ RubberBandStretcher::Impl::processOneChunk()
bool
RubberBandStretcher::Impl::testInbufReadSpace(size_t c)
{
Profiler profiler("RubberBandStretcher::Impl::testInbufReadSpace");
ChannelData &cd = *m_channelData[c];
RingBuffer<float> &inbuf = *cd.inbuf;
@@ -223,6 +303,8 @@ RubberBandStretcher::Impl::processChunkForChannel(size_t c,
size_t shiftIncrement,
bool phaseReset)
{
Profiler profiler("RubberBandStretcher::Impl::processChunkForChannel");
// Process a single chunk on a single channel. This assumes
// enough input data is available; caller must have tested this
// using e.g. testInbufReadSpace first. Return true if this is
@@ -284,7 +366,9 @@ RubberBandStretcher::Impl::processChunkForChannel(size_t c,
}
if (m_threaded) {
size_t required = shiftIncrement;
int required = shiftIncrement;
if (m_pitchScale != 1.0) {
required = int(required / m_pitchScale) + 1;
}
@@ -313,6 +397,8 @@ RubberBandStretcher::Impl::calculateIncrements(size_t &phaseIncrementRtn,
size_t &shiftIncrementRtn,
bool &phaseReset)
{
Profiler profiler("RubberBandStretcher::Impl::calculateIncrements");
// cerr << "calculateIncrements" << endl;
// Calculate the next upcoming phase and shift increment, on the
@@ -342,6 +428,8 @@ RubberBandStretcher::Impl::calculateIncrements(size_t &phaseIncrementRtn,
}
}
const int hs = m_windowSize/2 + 1;
// Normally we would mix down the time-domain signal and apply a
// single FFT, or else mix down the Cartesian form of the
// frequency-domain signal. Both of those would be inefficient
@@ -352,22 +440,30 @@ RubberBandStretcher::Impl::calculateIncrements(size_t &phaseIncrementRtn,
// phases to cancel each other, and broadband effects will still
// be apparent.
for (size_t i = 0; i <= m_windowSize/2; ++i) {
cd.fltbuf[i] = 0.0;
}
float df = 0.f;
for (size_t c = 0; c < m_channels; ++c) {
for (size_t i = 0; i <= m_windowSize/2; ++i) {
cd.fltbuf[i] += m_channelData[c]->mag[i];
if (m_channels == 1) {
df = m_phaseResetAudioCurve->process(cd.mag, m_increment);
} else {
double *tmp = (double *)alloca(hs * sizeof(double));
for (int i = 0; i < hs; ++i) {
tmp[i] = 0.0;
}
for (size_t c = 0; c < m_channels; ++c) {
for (int i = 0; i < hs; ++i) {
tmp[i] += m_channelData[c]->mag[i];
}
}
}
float df = m_phaseResetAudioCurve->process(cd.fltbuf, m_increment);
df = m_phaseResetAudioCurve->process(tmp, m_increment);
}
int incr = m_stretchCalculator->calculateSingle
(getEffectiveRatio(),
m_inputDuration, //!!! no, totally wrong... fortunately it doesn't matter atm
df);
(getEffectiveRatio(), df, m_increment);
m_lastProcessPhaseResetDf.write(&df, 1);
m_lastProcessOutputIncrements.write(&incr, 1);
@@ -407,6 +503,8 @@ RubberBandStretcher::Impl::getIncrements(size_t channel,
size_t &shiftIncrementRtn,
bool &phaseReset)
{
Profiler profiler("RubberBandStretcher::Impl::getIncrements");
if (channel >= m_channels) {
phaseIncrementRtn = m_increment;
shiftIncrementRtn = m_increment;
@@ -478,39 +576,78 @@ RubberBandStretcher::Impl::getIncrements(size_t channel,
void
RubberBandStretcher::Impl::analyseChunk(size_t channel)
{
size_t i;
Profiler profiler("RubberBandStretcher::Impl::analyseChunk");
int i;
ChannelData &cd = *m_channelData[channel];
double *const R__ dblbuf = cd.dblbuf;
float *const R__ fltbuf = cd.fltbuf;
int sz = m_windowSize;
int hs = m_windowSize/2;
// cd.fltbuf is known to contain m_windowSize samples
m_window->cut(cd.fltbuf);
m_window->cut(fltbuf);
for (i = 0; i < m_windowSize/2; ++i) {
cd.dblbuf[i] = cd.fltbuf[i + m_windowSize/2];
cd.dblbuf[i + m_windowSize/2] = cd.fltbuf[i];
if (cd.oversample > 1) {
int bufsiz = sz * cd.oversample;
int offset = (bufsiz - sz) / 2;
// eek
for (i = 0; i < offset; ++i) {
dblbuf[i] = 0.0;
}
for (i = 0; i < offset; ++i) {
dblbuf[bufsiz - i - 1] = 0.0;
}
for (i = 0; i < sz; ++i) {
dblbuf[offset + i] = fltbuf[i];
}
for (i = 0; i < bufsiz / 2; ++i) {
double tmp = dblbuf[i];
dblbuf[i] = dblbuf[i + bufsiz/2];
dblbuf[i + bufsiz/2] = tmp;
}
} else {
for (i = 0; i < hs; ++i) {
dblbuf[i] = fltbuf[i + hs];
dblbuf[i + hs] = fltbuf[i];
}
}
cd.fft->forwardPolar(cd.dblbuf, cd.mag, cd.phase);
cd.fft->forwardPolar(dblbuf, cd.mag, cd.phase);
}
double mod(double x, double y) { return x - (y * floor(x / y)); }
double princarg(double a) { return mod(a + M_PI, -2 * M_PI) + M_PI; }
static inline double mod(double x, double y) { return x - (y * floor(x / y)); }
static inline double princarg(double a) { return mod(a + M_PI, -2.0 * M_PI) + M_PI; }
void
RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
bool phaseReset)
{
Profiler profiler("RubberBandStretcher::Impl::modifyChunk");
ChannelData &cd = *m_channelData[channel];
if (phaseReset && m_debugLevel > 1) {
cerr << "phase reset: leaving phases unmodified" << endl;
}
size_t count = m_windowSize/2;
size_t pfp = 0;
int pfp = 0;
double rate = m_stretcher->m_sampleRate;
int sz = m_windowSize;
int count = (sz * cd.oversample) / 2;
bool unchanged = cd.unchanged && (outputIncrement == m_increment);
bool fullReset = phaseReset;
if (!(m_options & OptionPhaseIndependent)) {
cd.freqPeak[0] = 0;
@@ -539,11 +676,11 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
}
}
size_t limit0 = lrint((freq0 * m_windowSize) / rate);
size_t limit1 = lrint((freq1 * m_windowSize) / rate);
size_t limit2 = lrint((freq2 * m_windowSize) / rate);
int limit0 = lrint((freq0 * sz * cd.oversample) / rate);
int limit1 = lrint((freq1 * sz * cd.oversample) / rate);
int limit2 = lrint((freq2 * sz * cd.oversample) / rate);
size_t range = 0;
int range = 0;
if (limit1 < limit0) limit1 = limit0;
if (limit2 < limit1) limit2 = limit1;
@@ -552,12 +689,12 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
int peakCount = 0;
for (size_t i = 0; i <= count; ++i) {
for (int i = 0; i <= count; ++i) {
double mag = cd.mag[i];
bool isPeak = true;
for (size_t j = 1; j <= range; ++j) {
for (int j = 1; j <= range; ++j) {
if (mag < cd.mag[i-j]) {
isPeak = false;
@@ -578,13 +715,13 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
// from pfp to half-way between pfp and i point at pfp, and
// those from the half-way mark to i point at i.
size_t halfway = (pfp + i) / 2;
int halfway = (pfp + i) / 2;
if (halfway == pfp) halfway = pfp + 1;
for (size_t j = pfp + 1; j < halfway; ++j) {
for (int j = pfp + 1; j < halfway; ++j) {
cd.freqPeak[j] = pfp;
}
for (size_t j = halfway; j <= i; ++j) {
for (int j = halfway; j <= i; ++j) {
cd.freqPeak[j] = i;
}
@@ -607,27 +744,33 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
cd.freqPeak[count] = count;
}
Profiler profiler2("RubberBandStretcher::Impl::modifyChunk part 2");
double peakInPhase = 0.0;
double peakOutPhase = 0.0;
int p = -1, pp = -1;
for (size_t i = 0; i <= count; ++i) {
for (int i = 0; i <= count; ++i) {
if (m_options & OptionPhaseIndependent) {
p = i;
pp = int(i)-1;
pp = i-1;
} else {
p = cd.freqPeak[i];
if (i > 0) pp = int(cd.freqPeak[i-1]);
if (i > 0) pp = cd.freqPeak[i-1];
}
bool resetThis = phaseReset;
if (m_options & OptionTransientsMixed) {
size_t low = lrint((150 * m_windowSize) / rate);
size_t high = lrint((1000 * m_windowSize) / rate);
int low = lrint((150 * sz * cd.oversample) / rate);
int high = lrint((1000 * sz * cd.oversample) / rate);
if (resetThis) {
if (i > low && i < high) resetThis = false;
if (i > low && i < high) {
resetThis = false;
fullReset = false;
}
}
}
@@ -635,7 +778,9 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
if (i == 0 || p != pp) {
double omega = (2 * M_PI * m_increment * p) / m_windowSize;
double omega = (2 * M_PI * m_increment * p) /
(sz * cd.oversample);
double expectedPhase = cd.prevPhase[p] + omega;
double phaseError = princarg(cd.phase[p] - expectedPhase);
double phaseIncrement = (omega + phaseError) / m_increment;
@@ -649,10 +794,13 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
peakInPhase = cd.prevPhase[p];
peakOutPhase = unwrappedPhase;
}
if (i != p) {
// cd.phase[i] = atan2(cd.imag[i], cd.real[i]);//!!!
double diffToPeak = peakInPhase - cd.phase[i];
double unwrappedPhase = peakOutPhase - diffToPeak;
@@ -662,57 +810,186 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
}
} else {
// resetThis == true
cd.prevPhase[i] = cd.phase[i];
cd.unwrappedPhase[i] = cd.phase[i];
}
}
if (fullReset) unchanged = true;
cd.unchanged = unchanged;
if (unchanged && m_debugLevel > 1) {
cerr << "frame unchanged on channel " << channel << endl;
}
}
void
RubberBandStretcher::Impl::formantShiftChunk(size_t channel)
{
Profiler profiler("RubberBandStretcher::Impl::formantShiftChunk");
ChannelData &cd = *m_channelData[channel];
double *const R__ mag = cd.mag;
double *const R__ envelope = cd.envelope;
double *const R__ dblbuf = cd.dblbuf;
const int sz = m_windowSize;
const int hs = m_windowSize/2;
cd.fft->inverseCepstral(mag, dblbuf);
double denom = sz;
for (int i = 0; i < sz; ++i) {
dblbuf[i] /= denom;
}
//!!! calculate this value -- the divisor should be the highest fundamental frequency we expect to find, plus a bit
const int cutoff = m_stretcher->m_sampleRate / 700;
dblbuf[0] /= 2;
dblbuf[cutoff-1] /= 2;
for (int i = cutoff; i < sz; ++i) {
dblbuf[i] = 0.0;
}
cd.fft->forward(dblbuf, envelope, 0);
for (int i = 0; i <= hs; ++i) {
envelope[i] = exp(envelope[i]);
}
for (int i = 0; i <= hs; ++i) {
mag[i] /= envelope[i];
}
if (m_pitchScale > 1.0) {
// scaling up, we want a new envelope that is lower by the pitch factor
for (int target = 0; target <= hs; ++target) {
int source = lrint(target * m_pitchScale);
if (source > m_windowSize) {
envelope[target] = 0.0;
} else {
envelope[target] = envelope[source];
}
}
} else {
// scaling down, we want a new envelope that is higher by the pitch factor
for (int target = hs; target > 0; ) {
--target;
int source = lrint(target * m_pitchScale);
envelope[target] = envelope[source];
}
}
for (int i = 0; i <= hs; ++i) {
mag[i] *= envelope[i];
}
cd.unchanged = false;
}
void
RubberBandStretcher::Impl::synthesiseChunk(size_t channel)
{
Profiler profiler("RubberBandStretcher::Impl::synthesiseChunk");
if ((m_options & OptionFormantPreserved) &&
(m_pitchScale != 1.0)) {
formantShiftChunk(channel);
}
ChannelData &cd = *m_channelData[channel];
cd.fft->inversePolar(cd.mag, cd.phase, cd.dblbuf);
double *const R__ dblbuf = cd.dblbuf;
float *const R__ fltbuf = cd.fltbuf;
float *const R__ accumulator = cd.accumulator;
float *const R__ windowAccumulator = cd.windowAccumulator;
int sz = m_windowSize;
int hs = m_windowSize/2;
int i;
for (size_t i = 0; i < m_windowSize/2; ++i) {
cd.fltbuf[i] = cd.dblbuf[i + m_windowSize/2];
cd.fltbuf[i + m_windowSize/2] = cd.dblbuf[i];
if (!cd.unchanged) {
cd.fft->inversePolar(cd.mag, cd.phase, cd.dblbuf);
if (cd.oversample > 1) {
int bufsiz = sz * cd.oversample;
int hbs = hs * cd.oversample;
int offset = (bufsiz - sz) / 2;
for (i = 0; i < hbs; ++i) {
double tmp = dblbuf[i];
dblbuf[i] = dblbuf[i + hbs];
dblbuf[i + hbs] = tmp;
}
for (i = 0; i < sz; ++i) {
fltbuf[i] = float(dblbuf[i + offset]);
}
} else {
for (i = 0; i < hs; ++i) {
fltbuf[i] = float(dblbuf[i + hs]);
}
for (i = 0; i < hs; ++i) {
fltbuf[i + hs] = float(dblbuf[i]);
}
}
float denom = float(sz * cd.oversample);
// our ffts produced unscaled results
for (i = 0; i < sz; ++i) {
fltbuf[i] = fltbuf[i] / float(sz * cd.oversample);
}
// } else {
// cerr << "unchanged on channel " << channel << endl;
}
// our ffts produced unscaled results
for (size_t i = 0; i < m_windowSize; ++i) {
cd.fltbuf[i] = cd.fltbuf[i] / m_windowSize;
}
m_window->cut(fltbuf);
m_window->cut(cd.fltbuf);
for (size_t i = 0; i < m_windowSize; ++i) {
cd.accumulator[i] += cd.fltbuf[i];
for (i = 0; i < sz; ++i) {
accumulator[i] += fltbuf[i];
}
cd.accumulatorFill = m_windowSize;
float fixed = m_window->getArea() * 1.5;
float fixed = m_window->getArea() * 1.5f;
for (size_t i = 0; i < m_windowSize; ++i) {
for (i = 0; i < sz; ++i) {
float val = m_window->getValue(i);
cd.windowAccumulator[i] += val * fixed;
windowAccumulator[i] += val * fixed;
}
}
void
RubberBandStretcher::Impl::writeChunk(size_t channel, size_t shiftIncrement, bool last)
{
ChannelData &cd = *m_channelData[channel];
Profiler profiler("RubberBandStretcher::Impl::writeChunk");
ChannelData &cd = *m_channelData[channel];
float *const R__ accumulator = cd.accumulator;
float *const R__ windowAccumulator = cd.windowAccumulator;
const int sz = m_windowSize;
const int si = shiftIncrement;
int i;
if (m_debugLevel > 2) {
cerr << "writeChunk(" << channel << ", " << shiftIncrement << ", " << last << ")" << endl;
}
for (int i = 0; i < shiftIncrement; ++i) {
if (cd.windowAccumulator[i] > 0.f) {
cd.accumulator[i] /= cd.windowAccumulator[i];
for (i = 0; i < si; ++i) {
if (windowAccumulator[i] > 0.f) {
accumulator[i] /= windowAccumulator[i];
}
}
@@ -723,9 +1000,11 @@ RubberBandStretcher::Impl::writeChunk(size_t channel, size_t shiftIncrement, boo
theoreticalOut = lrint(cd.inputSize * m_timeRatio);
}
if (m_pitchScale != 1.0 && cd.resampler) {
bool resampledAlready = resampleBeforeStretching();
size_t reqSize = int(ceil(shiftIncrement / m_pitchScale));
if (!resampledAlready && m_pitchScale != 1.0 && cd.resampler) {
size_t reqSize = int(ceil(si / m_pitchScale));
if (reqSize > cd.resamplebufSize) {
// This shouldn't normally happen -- the buffer is
// supposed to be initialised with enough space in the
@@ -734,15 +1013,13 @@ RubberBandStretcher::Impl::writeChunk(size_t channel, size_t shiftIncrement, boo
// calculator has gone mad, or something.
cerr << "WARNING: RubberBandStretcher::Impl::writeChunk: resizing resampler buffer from "
<< cd.resamplebufSize << " to " << reqSize << endl;
cd.resamplebufSize = reqSize;
if (cd.resamplebuf) delete[] cd.resamplebuf;
cd.resamplebuf = new float[cd.resamplebufSize];
cd.setResampleBufSize(reqSize);
}
size_t outframes = cd.resampler->resample(&cd.accumulator,
&cd.resamplebuf,
shiftIncrement,
si,
1.0 / m_pitchScale,
last);
@@ -751,28 +1028,28 @@ RubberBandStretcher::Impl::writeChunk(size_t channel, size_t shiftIncrement, boo
outframes, cd.outCount, theoreticalOut);
} else {
writeOutput(*cd.outbuf, cd.accumulator,
shiftIncrement, cd.outCount, theoreticalOut);
writeOutput(*cd.outbuf, accumulator,
si, cd.outCount, theoreticalOut);
}
for (size_t i = 0; i < m_windowSize - shiftIncrement; ++i) {
cd.accumulator[i] = cd.accumulator[i + shiftIncrement];
for (i = 0; i < sz - si; ++i) {
accumulator[i] = accumulator[i + si];
}
for (size_t i = m_windowSize - shiftIncrement; i < m_windowSize; ++i) {
cd.accumulator[i] = 0.0f;
for (i = sz - si; i < sz; ++i) {
accumulator[i] = 0.0f;
}
for (size_t i = 0; i < m_windowSize - shiftIncrement; ++i) {
cd.windowAccumulator[i] = cd.windowAccumulator[i + shiftIncrement];
for (i = 0; i < sz - si; ++i) {
windowAccumulator[i] = windowAccumulator[i + si];
}
for (size_t i = m_windowSize - shiftIncrement; i < m_windowSize; ++i) {
cd.windowAccumulator[i] = 0.0f;
for (i = sz - si; i < sz; ++i) {
windowAccumulator[i] = 0.0f;
}
if (cd.accumulatorFill > shiftIncrement) {
cd.accumulatorFill -= shiftIncrement;
if (cd.accumulatorFill > si) {
cd.accumulatorFill -= si;
} else {
cd.accumulatorFill = 0;
if (cd.draining) {
@@ -787,6 +1064,8 @@ RubberBandStretcher::Impl::writeChunk(size_t channel, size_t shiftIncrement, boo
void
RubberBandStretcher::Impl::writeOutput(RingBuffer<float> &to, float *from, size_t qty, size_t &outCount, size_t theoreticalOut)
{
Profiler profiler("RubberBandStretcher::Impl::writeOutput");
// In non-RT mode, we don't want to write the first startSkip
// samples, because the first chunk is centred on the start of the
// output. In RT mode we didn't apply any pre-padding in
@@ -859,6 +1138,8 @@ RubberBandStretcher::Impl::writeOutput(RingBuffer<float> &to, float *from, size_
int
RubberBandStretcher::Impl::available() const
{
Profiler profiler("RubberBandStretcher::Impl::available");
if (m_threaded) {
MutexLocker locker(&m_threadSetMutex);
if (m_channelData.empty()) return 0;
@@ -906,6 +1187,8 @@ RubberBandStretcher::Impl::available() const
size_t
RubberBandStretcher::Impl::retrieve(float *const *output, size_t samples) const
{
Profiler profiler("RubberBandStretcher::Impl::retrieve");
size_t got = samples;
for (size_t c = 0; c < m_channels; ++c) {