* More work on framing, interpolation and scaling for longer window than FFT size.

This adds the --smoothing option to the command line tool and SmoothingOn/Off
  options to the API, introducing a double-length window with presum FFT and
  time-domain smoothing.  Behaviour elsewhere _should_ be unchanged.
This commit is contained in:
Chris Cannam
2010-05-29 22:07:54 +01:00
parent 49cf25d724
commit 99ba629361
10 changed files with 190 additions and 128 deletions

View File

@@ -83,6 +83,7 @@ int main(int argc, char **argv)
bool lamination = true;
bool longwin = false;
bool shortwin = false;
bool smoothing = false;
bool hqpitch = false;
bool formant = false;
bool crispchanged = false;
@@ -132,6 +133,7 @@ int main(int argc, char **argv)
{ "bl-transients", 0, 0, '8' },
{ "detector-perc", 0, 0, '5' },
{ "detector-soft", 0, 0, '6' },
{ "smoothing", 0, 0, '9' },
{ "pitch-hq", 0, 0, '%' },
{ "threads", 0, 0, '@' },
{ "quiet", 0, 0, 'q' },
@@ -165,6 +167,7 @@ int main(int argc, char **argv)
case '5': detector = PercussiveDetector; crispchanged = true; break;
case '6': detector = SoftDetector; crispchanged = true; break;
case '8': transients = BandLimitedTransients; crispchanged = true; break;
case '9': smoothing = true; crispchanged = true; break;
case '%': hqpitch = true; break;
case 'c': crispness = atoi(optarg); break;
case 'q': quiet = true; break;
@@ -223,6 +226,7 @@ int main(int argc, char **argv)
cerr << " --no-lamination Disable phase lamination" << endl;
cerr << " --window-long Use longer processing window (actual size may vary)" << endl;
cerr << " --window-short Use shorter processing window" << endl;
cerr << " --smoothing Apply window presum and time-domain smoothing" << endl;
cerr << " --detector-perc Use percussive transient detector (as in pre-1.5)" << endl;
cerr << " --detector-soft Use soft transient detector" << endl;
cerr << " --pitch-hq In RT mode, use a slower, higher quality pitch shift" << endl;
@@ -362,6 +366,7 @@ int main(int argc, char **argv)
if (!lamination) options |= RubberBandStretcher::OptionPhaseIndependent;
if (longwin) options |= RubberBandStretcher::OptionWindowLong;
if (shortwin) options |= RubberBandStretcher::OptionWindowShort;
if (smoothing) options |= RubberBandStretcher::OptionSmoothingOn;
if (formant) options |= RubberBandStretcher::OptionFormantPreserved;
if (hqpitch) options |= RubberBandStretcher::OptionPitchHighQuality;

View File

@@ -191,7 +191,18 @@ public:
* likely to result in a smoother sound at the expense of
* clarity and timing.
*
* 8. Flags prefixed \c OptionFormant control the handling of
* 8. Flags prefixed \c OptionSmoothing control the use of
* window-presum FFT and time-domain smoothing. These options may
* not be changed after construction.
*
* \li \c OptionSmoothingOff - Do not use time-domain smoothing.
*
* \li \c OptionSmoothingOn - Use time-domain smoothing.
* This will result in a softer sound, but it may be
* appropriate for longer stretches of some instruments and
* can mix well with OptionWindowShort.
*
* 9. Flags prefixed \c OptionFormant control the handling of
* formant shape (spectral envelope) when pitch-shifting. These
* options may be changed at any time.
*
@@ -204,7 +215,7 @@ public:
* note frequency without so substantially affecting the
* perceived pitch profile of the voice or instrument.
*
* 9. Flags prefixed \c OptionPitch control the method used for
* 10. Flags prefixed \c OptionPitch control the method used for
* pitch shifting. These options may be changed at any time.
* They are only effective in realtime mode; in offline mode, the
* pitch-shift method is fixed.
@@ -253,6 +264,9 @@ public:
OptionWindowShort = 0x00100000,
OptionWindowLong = 0x00200000,
OptionSmoothingOff = 0x00000000,
OptionSmoothingOn = 0x00800000,
OptionFormantShifted = 0x00000000,
OptionFormantPreserved = 0x01000000,

View File

@@ -63,6 +63,9 @@ enum RubberBandOption {
RubberBandOptionWindowShort = 0x00100000,
RubberBandOptionWindowLong = 0x00200000,
RubberBandOptionSmoothingOff = 0x00000000,
RubberBandOptionSmoothingOn = 0x00800000,
RubberBandOptionFormantShifted = 0x00000000,
RubberBandOptionFormantPreserved = 0x01000000,

View File

@@ -76,6 +76,8 @@ RubberBandStretcher::Impl::ChannelData::construct(const std::set<size_t> &sizes,
accumulator = allocate<float>(maxSize);
windowAccumulator = allocate<float>(maxSize);
interpolator = allocate<float>(maxSize);
interpolatorScale = 0;
for (std::set<size_t>::const_iterator i = sizes.begin();
i != sizes.end(); ++i) {
@@ -195,6 +197,8 @@ RubberBandStretcher::Impl::ChannelData::setSizes(size_t windowSize,
deallocate(windowAccumulator);
windowAccumulator = newAcc;
interpolatorScale = 0;
//!!! and resampler?
for (size_t i = 0; i < realSize; ++i) {
@@ -294,6 +298,7 @@ RubberBandStretcher::Impl::ChannelData::reset()
inCount = 0;
inputSize = -1;
outCount = 0;
interpolatorScale = 0;
unchanged = true;
draining = false;
outputComplete = false;

View File

@@ -104,6 +104,8 @@ public:
float *accumulator;
size_t accumulatorFill;
float *windowAccumulator;
float *interpolator; // only used when time-domain smoothing is on
int interpolatorScale;
float *fltbuf;
double *dblbuf; // owned by FFT object, only used for time domain FFT i/o

View File

@@ -81,7 +81,7 @@ RubberBandStretcher::Impl::Impl(size_t sampleRate,
m_debugLevel(m_defaultDebugLevel),
m_mode(JustCreated),
m_awindow(0),
m_asinc(0),
m_afilter(0),
m_swindow(0),
m_studyFFT(0),
m_spaceAvailable("space"),
@@ -496,8 +496,15 @@ RubberBandStretcher::Impl::calculateSizes()
// 4 * m_baseFftSize unless ratio is less than 1/1024.
m_fftSize = windowSize;
m_aWindowSize = windowSize;
m_sWindowSize = windowSize;
if (m_options & OptionSmoothingOn) {
m_aWindowSize = windowSize * 2;
m_sWindowSize = windowSize * 2;
} else {
m_aWindowSize = windowSize;
m_sWindowSize = windowSize;
}
m_increment = inputIncrement;
// When squashing, the greatest theoretically possible output
@@ -596,7 +603,7 @@ RubberBandStretcher::Impl::configure()
}
}
m_awindow = m_windows[m_aWindowSize];
m_asinc = m_sincs[m_aWindowSize];
m_afilter = m_sincs[m_aWindowSize];
m_swindow = m_windows[m_sWindowSize];
if (m_debugLevel > 0) {
@@ -750,7 +757,7 @@ RubberBandStretcher::Impl::reconfigure()
}
m_awindow = m_windows[m_aWindowSize];
m_asinc = m_sincs[m_aWindowSize];
m_afilter = m_sincs[m_aWindowSize];
m_swindow = m_windows[m_sWindowSize];
for (size_t c = 0; c < m_channels; ++c) {
@@ -960,7 +967,7 @@ RubberBandStretcher::Impl::study(const float *const *input, size_t samples, bool
// greater than the fft size so we are doing a
// time-aliased presum fft) or zero-pad, then we might
// as well use our standard function for it. This
// means we retain the m_asinc cut if folding as well,
// means we retain the m_afilter cut if folding as well,
// which is good for consistency with real-time mode.
// We get fftshift as well, which we don't want, but
// the penalty is nominal.
@@ -971,7 +978,7 @@ RubberBandStretcher::Impl::study(const float *const *input, size_t samples, bool
(std::max(m_fftSize, m_aWindowSize) * sizeof(float));
if (m_aWindowSize > m_fftSize) {
m_asinc->cut(cd.accumulator);
m_afilter->cut(cd.accumulator);
}
cutShiftAndFold(tmp, m_fftSize, cd.accumulator, m_awindow);

View File

@@ -108,7 +108,7 @@ protected:
void analyseChunk(size_t channel);
void modifyChunk(size_t channel, size_t outputIncrement, bool phaseReset);
void formantShiftChunk(size_t channel);
void synthesiseChunk(size_t channel);
void synthesiseChunk(size_t channel, size_t shiftIncrement);
void writeChunk(size_t channel, size_t shiftIncrement, bool last);
void calculateSizes();
@@ -173,7 +173,7 @@ protected:
std::map<size_t, Window<float> *> m_windows;
std::map<size_t, SincWindow<float> *> m_sincs;
Window<float> *m_awindow;
SincWindow<float> *m_asinc;
SincWindow<float> *m_afilter;
Window<float> *m_swindow;
FFT *m_studyFFT;

View File

@@ -377,7 +377,7 @@ RubberBandStretcher::Impl::processChunkForChannel(size_t c,
// then skip m_increment to advance the read pointer.
modifyChunk(c, phaseIncrement, phaseReset);
synthesiseChunk(c); // reads from cd.mag, cd.phase
synthesiseChunk(c, shiftIncrement); // reads from cd.mag, cd.phase
if (m_debugLevel > 2) {
if (phaseReset) {
@@ -647,7 +647,7 @@ RubberBandStretcher::Impl::analyseChunk(size_t channel)
// cd.fltbuf is known to contain m_aWindowSize samples
if (m_aWindowSize > m_fftSize) {
m_asinc->cut(fltbuf);
m_afilter->cut(fltbuf);
}
cutShiftAndFold(dblbuf, m_fftSize, fltbuf, m_awindow);
@@ -859,7 +859,8 @@ RubberBandStretcher::Impl::formantShiftChunk(size_t channel)
}
void
RubberBandStretcher::Impl::synthesiseChunk(size_t channel)
RubberBandStretcher::Impl::synthesiseChunk(size_t channel,
size_t shiftIncrement)
{
Profiler profiler("RubberBandStretcher::Impl::synthesiseChunk");
@@ -876,8 +877,11 @@ RubberBandStretcher::Impl::synthesiseChunk(size_t channel)
float *const R__ accumulator = cd.accumulator;
float *const R__ windowAccumulator = cd.windowAccumulator;
int sz = m_fftSize;
int hs = sz / 2;
const int fsz = m_fftSize;
const int hs = fsz / 2;
const int wsz = m_sWindowSize;
int i;
if (!cd.unchanged) {
@@ -885,73 +889,40 @@ RubberBandStretcher::Impl::synthesiseChunk(size_t channel)
cd.fft->inversePolar(cd.mag, cd.phase, cd.dblbuf);
// our ffts produced unscaled results
float factor = 1.0 / m_fftSize;
v_scale(dblbuf, factor, sz);
float factor = 1.f / fsz;
v_scale(dblbuf, factor, fsz);
if (m_sWindowSize == m_fftSize) {
if (wsz == fsz) {
v_convert(fltbuf, dblbuf + hs, hs);
v_convert(fltbuf + hs, dblbuf, hs);
} else {
v_zero(fltbuf, m_sWindowSize);
#ifdef PARP
//!!! centre fft frame (effectively only if swindowsize == fftsize, else discontinuities)
const int count = std::min(m_fftSize, m_sWindowSize);
int i = m_sWindowSize/2 - count/2;
int j = m_fftSize - count/2;
for (int k = 0; k < count; ++k) {
v_zero(fltbuf, wsz);
int j = fsz - wsz/2;
while (j < 0) j += fsz;
for (int i = 0; i < wsz; ++i) {
fltbuf[i] += dblbuf[j];
if (i++ == m_sWindowSize) i = 0;
if (j++ == m_fftSize) j = 0;
if (++j == fsz) j = 0;
}
#else
//!!! unwrap
int j = sz - m_sWindowSize/2;
while (j < 0) j += sz;
for (int i = 0; i < m_sWindowSize; ++i) {
fltbuf[i] += dblbuf[j];
if (++j == sz) j = 0;
}
#endif
}
}
if (wsz > fsz) {
float *tmp = (float *)alloca(wsz * sizeof(float));
int p = shiftIncrement * 2;
if (cd.interpolatorScale != p) {
SincWindow<float>::write(cd.interpolator, wsz, p);
}
v_multiply(fltbuf, cd.interpolator, wsz);
v_copy(tmp, cd.interpolator, wsz);
m_swindow->cut(tmp);
v_add(windowAccumulator, tmp, wsz);
} else {
m_swindow->add(windowAccumulator, m_awindow->getArea() * 1.5f);
}
m_swindow->cut(fltbuf);
v_add(accumulator, fltbuf, m_sWindowSize);
cd.accumulatorFill = m_sWindowSize;
/*
std::cerr << "Note: synthesis window area = " << m_swindow->getArea()
<< ", analysis window area = " << m_awindow->getArea()
<< ", analysis sinc area = " << m_asinc->getArea()
<< std::endl;
*/
//!!!
float *tmp = (float *)alloca(m_sWindowSize * sizeof(float));
v_set(tmp, 1.f, m_sWindowSize);
//!!! m_awindow->cut(tmp); //!!! if (m_aWindowSize == m_sWindowSize)
m_swindow->cut(tmp);
/*
if (m_aWindowSize != m_fftSize) {
if (m_aWindowSize == m_sWindowSize) {
m_asinc->cut(tmp);
} else {
int sourceIndex = m_aWindowSize/2 - m_sWindowSize/2;
for (int i = 0; i < m_sWindowSize; ++i) {
if (sourceIndex >= 0 && sourceIndex < m_aWindowSize) {
tmp[i] *= m_asinc->getValue(sourceIndex);
}
++sourceIndex;
}
}
}
*/
v_add(windowAccumulator, tmp, m_sWindowSize);
v_add(accumulator, fltbuf, wsz);
cd.accumulatorFill = wsz;
}
void

View File

@@ -31,25 +31,44 @@ class SincWindow
{
public:
/**
* Construct a sinc windower which produces a window of the given
* size containing the values of sinc(x) with x=0 at the centre,
* such that the distance from -pi to pi (the point at which the
* sinc function first crosses zero, for negative and positive
* Construct a sinc windower which produces a window of size n
* containing the values of sinc(x) with x=0 at index n/2, such
* that the distance from -pi to pi (the point at which the sinc
* function first crosses zero, for negative and positive
* arguments respectively) is p samples.
*/
SincWindow(int size, int p) : m_size(size), m_p(p) { encache(); }
SincWindow(const SincWindow &w) : m_size(w.m_size), m_p(w.m_p) { encache(); }
SincWindow(int n, int p) : m_size(n), m_p(p), m_cache(0) {
encache();
}
SincWindow(const SincWindow &w) : m_size(w.m_size), m_p(w.m_p), m_cache(0) {
encache();
}
SincWindow &operator=(const SincWindow &w) {
if (&w == this) return *this;
m_size = w.m_size;
m_p = w.m_p;
m_cache = 0;
encache();
return *this;
}
virtual ~SincWindow() { delete[] m_cache; }
virtual ~SincWindow() {
deallocate(m_cache);
}
inline void cut(T *const R__ block) const {
v_multiply(block, m_cache, m_size);
/**
* Regenerate the sinc window with the same size, but a new scale
* (the p value is interpreted as for the argument of the same
* name to the constructor). If p is unchanged from the previous
* value, do nothing (quickly).
*/
inline void rewrite(int p) {
if (m_p == p) return;
m_p = p;
encache();
}
inline void cut(T *const R__ dst) const {
v_multiply(dst, m_cache, m_size);
}
inline void cut(const T *const R__ src, T *const R__ dst) const {
@@ -66,42 +85,62 @@ public:
inline int getSize() const { return m_size; }
inline int getP() const { return m_p; }
/**
* Write a sinc window of size n with scale p (the p value is
* interpreted as for the argument of the same name to the
* constructor).
*/
static
void write(T *const R__ dst, const int n, const int p) {
const int half = n/2;
writeHalf(dst, n, p);
int target = half - 1;
for (int i = half + 1; i < n; ++i) {
dst[target--] = dst[i];
}
const T twopi = 2. * M_PI;
T arg = T(half) * twopi / p;
dst[0] = sin(arg) / arg;
}
protected:
int m_size;
int m_p;
T *R__ m_cache;
T m_area;
void encache();
/**
* Write the positive half (i.e. n/2 to n-1) of a sinc window of
* size n with scale p (the p value is interpreted as for the
* argument of the same name to the constructor). The negative
* half (indices 0 to n/2-1) of dst is left unchanged.
*/
static
void writeHalf(T *const R__ dst, const int n, const int p) {
const int half = n/2;
const T twopi = 2. * M_PI;
dst[half] = T(1.0);
for (int i = 1; i < half; ++i) {
T arg = T(i) * twopi / p;
dst[half+i] = sin(arg) / arg;
}
}
void encache() {
if (!m_cache) {
m_cache = allocate<T>(m_size);
}
write(m_cache, m_size, m_p);
m_area = 0;
for (int i = 0; i < m_size; ++i) {
m_area += m_cache[i];
}
m_area /= m_size;
}
};
template <typename T>
void SincWindow<T>::encache()
{
const int n = m_size;
T *mult = allocate<T>(n);
v_set(mult, T(1.0), n);
int i;
for (i = 0; i < n; ++i) {
T extent = T(n)/2.;
T arg = (T(i) - extent) * (2. * M_PI) / m_p;
if (arg != 0.) {
mult[i] *= sin(arg) / arg;
}
}
m_cache = mult;
m_area = 0;
for (i = 0; i < n; ++i) {
std::cout << i << ":" << m_cache[i] << " ";
m_area += m_cache[i];
}
std::cout << std::endl;
m_area /= n;
}
}
#endif

View File

@@ -45,16 +45,23 @@ public:
/**
* Construct a windower of the given type.
*/
Window(WindowType type, int size) : m_type(type), m_size(size) { encache(); }
Window(const Window &w) : m_type(w.m_type), m_size(w.m_size) { encache(); }
Window(WindowType type, int size) : m_type(type), m_size(size), m_cache(0) {
encache();
}
Window(const Window &w) : m_type(w.m_type), m_size(w.m_size), m_cache(0) {
encache();
}
Window &operator=(const Window &w) {
if (&w == this) return *this;
m_type = w.m_type;
m_size = w.m_size;
m_cache = 0;
encache();
return *this;
}
virtual ~Window() { delete[] m_cache; }
virtual ~Window() {
deallocate(m_cache);
}
inline void cut(T *const R__ block) const {
v_multiply(block, m_cache, m_size);
@@ -68,6 +75,16 @@ public:
v_add_with_gain(dst, m_cache, m_size, scale);
}
inline T getRMS() const {
T total = 0;
for (int i = 0; i < m_size; ++i) {
total += m_cache[i] * m_cache[i];
}
T rms = sqrt(total / m_size);
std::cerr << "rms = " << rms << std::endl;
return rms;
}
inline T getArea() const { return m_area; }
inline T getValue(int i) const { return m_cache[i]; }
@@ -87,41 +104,42 @@ protected:
template <typename T>
void Window<T>::encache()
{
if (!m_cache) m_cache = allocate<T>(m_size);
const int n = m_size;
T *mult = allocate<T>(n);
v_set(mult, T(1.0), n);
v_set(m_cache, T(1.0), n);
int i;
switch (m_type) {
case RectangularWindow:
for (i = 0; i < n; ++i) {
mult[i] *= 0.5;
m_cache[i] *= 0.5;
}
break;
case BartlettWindow:
for (i = 0; i < n/2; ++i) {
mult[i] *= (i / T(n/2));
mult[i + n/2] *= (1.0 - (i / T(n/2)));
m_cache[i] *= (i / T(n/2));
m_cache[i + n/2] *= (1.0 - (i / T(n/2)));
}
break;
case HammingWindow:
cosinewin(mult, 0.54, 0.46, 0.0, 0.0);
cosinewin(m_cache, 0.54, 0.46, 0.0, 0.0);
break;
case HanningWindow:
cosinewin(mult, 0.50, 0.50, 0.0, 0.0);
cosinewin(m_cache, 0.50, 0.50, 0.0, 0.0);
break;
case BlackmanWindow:
cosinewin(mult, 0.42, 0.50, 0.08, 0.0);
cosinewin(m_cache, 0.42, 0.50, 0.08, 0.0);
break;
case GaussianWindow:
for (i = 0; i < n; ++i) {
mult[i] *= pow(2, - pow((i - (n-1)/2.0) / ((n-1)/2.0 / 3), 2));
m_cache[i] *= pow(2, - pow((i - (n-1)/2.0) / ((n-1)/2.0 / 3), 2));
}
break;
@@ -130,29 +148,27 @@ void Window<T>::encache()
int N = n-1;
for (i = 0; i < N/4; ++i) {
T m = 2 * pow(1.0 - (T(N)/2 - i) / (T(N)/2), 3);
mult[i] *= m;
mult[N-i] *= m;
m_cache[i] *= m;
m_cache[N-i] *= m;
}
for (i = N/4; i <= N/2; ++i) {
int wn = i - N/2;
T m = 1.0 - 6 * pow(wn / (T(N)/2), 2) * (1.0 - abs(wn) / (T(N)/2));
mult[i] *= m;
mult[N-i] *= m;
m_cache[i] *= m;
m_cache[N-i] *= m;
}
break;
}
case NuttallWindow:
cosinewin(mult, 0.3635819, 0.4891775, 0.1365995, 0.0106411);
cosinewin(m_cache, 0.3635819, 0.4891775, 0.1365995, 0.0106411);
break;
case BlackmanHarrisWindow:
cosinewin(mult, 0.35875, 0.48829, 0.14128, 0.01168);
cosinewin(m_cache, 0.35875, 0.48829, 0.14128, 0.01168);
break;
}
m_cache = mult;
m_area = 0;
for (i = 0; i < n; ++i) {
m_area += m_cache[i];