* Improvements to offline phase reset point detection

* Better support for higher sample rates
* Save and restore FFTW wisdom (needs work still)
* More tidying, options and argument overhauls
This commit is contained in:
Chris Cannam
2007-11-20 20:17:13 +00:00
parent e9cb6dbc37
commit 7c4fcd85da
11 changed files with 408 additions and 322 deletions

View File

@@ -34,12 +34,13 @@ public:
static const int OptionStretchElastic = 0x00000000; static const int OptionStretchElastic = 0x00000000;
static const int OptionStretchPrecise = 0x00000010; static const int OptionStretchPrecise = 0x00000010;
static const int OptionTransientsMixed = 0x00000000; static const int OptionTransientsCrisp = 0x00000000;
static const int OptionTransientsSmooth = 0x00000100; static const int OptionTransientsMixed = 0x00000100;
static const int OptionTransientsCrisp = 0x00000200; static const int OptionTransientsSmooth = 0x00000200;
static const int OptionPhasePeakLocked = 0x00000000; static const int OptionPhaseAdaptive = 0x00000000;
static const int OptionPhaseIndependent = 0x00001000; static const int OptionPhasePeakLocked = 0x00001000;
static const int OptionPhaseIndependent = 0x00002000;
static const int OptionThreadingAuto = 0x00000000; static const int OptionThreadingAuto = 0x00000000;
static const int OptionThreadingNone = 0x00010000; static const int OptionThreadingNone = 0x00010000;
@@ -98,6 +99,8 @@ public:
virtual void setDebugLevel(int level); virtual void setDebugLevel(int level);
static void setDefaultDebugLevel(int level);
protected: protected:
class Impl; class Impl;
Impl *m_d; Impl *m_d;

View File

@@ -20,6 +20,7 @@
#include <cmath> #include <cmath>
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <cstdio>
#include <vector> #include <vector>
class FFTImpl class FFTImpl
@@ -56,12 +57,17 @@ public:
~D_FFTW() { ~D_FFTW() {
if (m_fplanf) { if (m_fplanf) {
//!!! shouldn't do this every time, but only when the last one
// is destroyed (likewise shouldn't load every time) -- want
// a static refcount + mutex
saveWisdom('f');
fftwf_destroy_plan(m_fplanf); fftwf_destroy_plan(m_fplanf);
fftwf_destroy_plan(m_fplani); fftwf_destroy_plan(m_fplani);
fftwf_free(m_fbuf); fftwf_free(m_fbuf);
fftwf_free(m_fpacked); fftwf_free(m_fpacked);
} }
if (m_dplanf) { if (m_dplanf) {
saveWisdom('d');
fftw_destroy_plan(m_dplanf); fftw_destroy_plan(m_dplanf);
fftw_destroy_plan(m_dplani); fftw_destroy_plan(m_dplani);
fftw_free(m_dbuf); fftw_free(m_dbuf);
@@ -73,6 +79,7 @@ public:
void initFloat() { void initFloat() {
if (m_fplanf) return; if (m_fplanf) return;
loadWisdom('f');
m_fbuf = (float *)fftw_malloc(m_size * sizeof(float)); m_fbuf = (float *)fftw_malloc(m_size * sizeof(float));
m_fpacked = (fftwf_complex *)fftw_malloc m_fpacked = (fftwf_complex *)fftw_malloc
((m_size/2 + 1) * sizeof(fftwf_complex)); ((m_size/2 + 1) * sizeof(fftwf_complex));
@@ -84,6 +91,7 @@ public:
void initDouble() { void initDouble() {
if (m_dplanf) return; if (m_dplanf) return;
loadWisdom('d');
m_dbuf = (double *)fftw_malloc(m_size * sizeof(double)); m_dbuf = (double *)fftw_malloc(m_size * sizeof(double));
m_dpacked = (fftw_complex *)fftw_malloc m_dpacked = (fftw_complex *)fftw_malloc
((m_size/2 + 1) * sizeof(fftw_complex)); ((m_size/2 + 1) * sizeof(fftw_complex));
@@ -93,6 +101,35 @@ public:
(m_size, m_dpacked, m_dbuf, FFTW_MEASURE); (m_size, m_dpacked, m_dbuf, FFTW_MEASURE);
} }
void loadWisdom(char type) { wisdom(false, type); }
void saveWisdom(char type) { wisdom(true, type); }
void wisdom(bool save, char type) {
const char *home = getenv("HOME");
if (!home) return;
char fn[256];
snprintf(fn, 256, "%s/%s.%c", home, ".rubberband.wisdom", type);
FILE *f = fopen(fn, save ? "wb" : "rb");
if (!f) return;
if (save) {
switch (type) {
case 'f': fftwf_export_wisdom_to_file(f); break;
case 'd': fftw_export_wisdom_to_file(f); break;
}
} else {
switch (type) {
case 'f': fftwf_import_wisdom_from_file(f); break;
case 'd': fftw_import_wisdom_from_file(f); break;
}
}
fclose(f);
}
void packFloat(float *re, float *im) { void packFloat(float *re, float *im) {
for (unsigned int i = 0; i <= m_size/2; ++i) { for (unsigned int i = 0; i <= m_size/2; ++i) {
m_fpacked[i][0] = re[i]; m_fpacked[i][0] = re[i];
@@ -508,14 +545,14 @@ FFT::FFT(unsigned int size)
break; break;
case 1: case 1:
std::cerr << "FFT::FFT(" << size << "): using FFTW3 implementation" // std::cerr << "FFT::FFT(" << size << "): using FFTW3 implementation"
<< std::endl; // << std::endl;
d = new D_FFTW(size); d = new D_FFTW(size);
break; break;
default: default:
std::cerr << "FFT::FFT(" << size << "): using built-in implementation" // std::cerr << "FFT::FFT(" << size << "): using built-in implementation"
<< std::endl; // << std::endl;
d = new D_Cross(size); d = new D_Cross(size);
break; break;
} }

View File

@@ -52,8 +52,8 @@ Resampler::D::D(Quality quality, size_t channels, size_t maxBufferSize) :
m_iinsize(0), m_iinsize(0),
m_ioutsize(0) m_ioutsize(0)
{ {
std::cerr << "Resampler::Resampler: using libsamplerate implementation" // std::cerr << "Resampler::Resampler: using libsamplerate implementation"
<< std::endl; // << std::endl;
int err = 0; int err = 0;
m_src = src_new(quality == Best ? SRC_SINC_BEST_QUALITY : m_src = src_new(quality == Best ? SRC_SINC_BEST_QUALITY :
@@ -122,7 +122,7 @@ Resampler::D::resample(float **in, float **out, size_t incount, float ratio,
//!!! check err, respond appropriately //!!! check err, respond appropriately
if (m_channels > 1) { if (m_channels > 1) {
for (size_t i = 0; i < data.output_frames_gen; ++i) { for (int i = 0; i < data.output_frames_gen; ++i) {
for (size_t c = 0; c < m_channels; ++c) { for (size_t c = 0; c < m_channels; ++c) {
out[c][i] = m_iout[i * m_channels + c]; out[c][i] = m_iout[i * m_channels + c];
} }

View File

@@ -179,6 +179,11 @@ RubberBandStretcher::setDebugLevel(int level)
m_d->setDebugLevel(level); m_d->setDebugLevel(level);
} }
void
RubberBandStretcher::setDefaultDebugLevel(int level)
{
Impl::setDefaultDebugLevel(level);
}
} }

View File

@@ -32,10 +32,10 @@ StretchCalculator::StretchCalculator(size_t sampleRate,
m_divergence(0), m_divergence(0),
m_recovery(0), m_recovery(0),
m_prevRatio(1.0), m_prevRatio(1.0),
m_wasTransient(false), m_transientAmnesty(0),
m_useHardPeaks(useHardPeaks) m_useHardPeaks(useHardPeaks)
{ {
std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl; // std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl;
} }
StretchCalculator::~StretchCalculator() StretchCalculator::~StretchCalculator()
@@ -47,38 +47,6 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
const std::vector<float> &phaseResetDf, const std::vector<float> &phaseResetDf,
const std::vector<float> &stretchDf) const std::vector<float> &stretchDf)
{ {
// Method:
//!!! This description is out of date.
// 1. Pre-process the df array, and for each (say) one second's
// worth of values, calculate the number of peaks that would
// qualify for phase reset given the default threshold. Then
// reduce or increase the threshold by stages until that number is
// in a sensible range (say 1-10 peaks per second -- the low end
// is harder to estimate than the high end, so it may be better to
// start with a high sensitivity and reduce it).
// 2. Record the positions of peaks, and separately the positions
// of those peaks that qualify for reset using the sliding
// threshold window. Don't permit two locked peaks within a very
// short time frame (e.g. 30-50ms).
// 3. Map each of the locked peaks (or any peaks that are over a
// given intensity?), as well as the start and end points, to a
// proportionate position in the newly stretched array so as to
// ensure that their timing is strictly "correct".
// 4. Calculate how much time is left in the stretch total, after
// each of the locked chunks has been allocated its static
// allowance. Also count the non-locked chunks.
// 5. For each region between two locked chunks, calculate the
// number of samples to allocate that region given the time
// available for stretch and the number of non-locked chunks.
// Then divvy them up... how exactly?
assert(phaseResetDf.size() == stretchDf.size()); assert(phaseResetDf.size() == stretchDf.size());
m_lastPeaks = findPeaks(phaseResetDf); m_lastPeaks = findPeaks(phaseResetDf);
@@ -89,13 +57,10 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
size_t outputDuration = lrint(inputDuration * ratio); size_t outputDuration = lrint(inputDuration * ratio);
std::cerr << "debug level: " << m_debugLevel << std::endl;
if (m_debugLevel > 0) { if (m_debugLevel > 0) {
std::cerr << "StretchCalculator::calculate(): inputDuration " << inputDuration << ", ratio " << ratio << ", outputDuration " << outputDuration; std::cerr << "StretchCalculator::calculate(): inputDuration " << inputDuration << ", ratio " << ratio << ", outputDuration " << outputDuration;
} }
//!!! round down?
outputDuration = lrint((phaseResetDf.size() * m_increment) * ratio); outputDuration = lrint((phaseResetDf.size() * m_increment) * ratio);
if (m_debugLevel > 0) { if (m_debugLevel > 0) {
@@ -103,41 +68,32 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
std::cerr << ", df size " << phaseResetDf.size() << std::endl; std::cerr << ", df size " << phaseResetDf.size() << std::endl;
} }
// size_t stretchable = outputDuration - lockCount * m_increment;
std::vector<size_t> fixedAudioChunks; std::vector<size_t> fixedAudioChunks;
for (size_t i = 0; i < peaks.size(); ++i) { for (size_t i = 0; i < peaks.size(); ++i) {
fixedAudioChunks.push_back fixedAudioChunks.push_back
//!!! this should be rounding down, shouldn't it? not lrint?
(lrint((double(peaks[i].chunk) * outputDuration) / totalCount)); (lrint((double(peaks[i].chunk) * outputDuration) / totalCount));
} }
// size_t lockIndex = 0;
if (m_debugLevel > 1) { if (m_debugLevel > 1) {
std::cerr << "have " << peaks.size() << " fixed positions" << std::endl; std::cerr << "have " << peaks.size() << " fixed positions" << std::endl;
} }
size_t totalInput = 0, totalOutput = 0; size_t totalInput = 0, totalOutput = 0;
// so for each inter-lock region, we want to take the number of // For each region between two consecutive time sync points, we
// output chunks to be allocated and the detection function values // want to take the number of output chunks to be allocated and
// within the range, and produce a series of increments that sum // the detection function values within the range, and produce a
// to the number of output chunks, such that each increment is // series of increments that sum to the number of output chunks,
// displaced from the input increment by an amount inversely // such that each increment is displaced from the input increment
// proportional to the magnitude of the detection function at that // by an amount inversely proportional to the magnitude of the
// input step. Ideally the detection function would have been // stretch detection function at that input step.
// somewhat smoothed for this purpose but we'll start raw.
//!!! Actually, we would possibly be better off using a fixed
// smooth curve than the detection function itself.
size_t regionTotalChunks = 0; size_t regionTotalChunks = 0;
for (size_t i = 0; i <= peaks.size(); ++i) { for (size_t i = 0; i <= peaks.size(); ++i) {
size_t regionStart, regionStartChunk, regionEnd, regionEndChunk; size_t regionStart, regionStartChunk, regionEnd, regionEndChunk;
bool phaseLock = false; bool phaseReset = false;
if (i == 0) { if (i == 0) {
regionStartChunk = 0; regionStartChunk = 0;
@@ -145,7 +101,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
} else { } else {
regionStartChunk = peaks[i-1].chunk; regionStartChunk = peaks[i-1].chunk;
regionStart = fixedAudioChunks[i-1]; regionStart = fixedAudioChunks[i-1];
phaseLock = peaks[i-1].hard; phaseReset = peaks[i-1].hard;
} }
if (i == peaks.size()) { if (i == peaks.size()) {
@@ -172,7 +128,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
dfRegion = smoothDF(dfRegion); dfRegion = smoothDF(dfRegion);
std::vector<int> regionIncrements = distributeRegion std::vector<int> regionIncrements = distributeRegion
(dfRegion, regionDuration, ratio, phaseLock); (dfRegion, regionDuration, ratio, phaseReset);
size_t totalForRegion = 0; size_t totalForRegion = 0;
@@ -180,7 +136,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
int incr = regionIncrements[j]; int incr = regionIncrements[j];
if (j == 0 && phaseLock) increments.push_back(-incr); if (j == 0 && phaseReset) increments.push_back(-incr);
else increments.push_back(incr); else increments.push_back(incr);
if (incr > 0) totalForRegion += incr; if (incr > 0) totalForRegion += incr;
@@ -200,6 +156,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
std::cerr << "total input increment = " << totalInput << " (= " << totalInput / m_increment << " chunks), output = " << totalOutput << ", ratio = " << double(totalOutput)/double(totalInput) << ", ideal output " << ceil(totalInput * ratio) << std::endl; std::cerr << "total input increment = " << totalInput << " (= " << totalInput / m_increment << " chunks), output = " << totalOutput << ", ratio = " << double(totalOutput)/double(totalInput) << ", ideal output " << ceil(totalInput * ratio) << std::endl;
std::cerr << "(region total = " << regionTotalChunks << ")" << std::endl; std::cerr << "(region total = " << regionTotalChunks << ")" << std::endl;
} }
return increments; return increments;
} }
@@ -210,13 +167,15 @@ StretchCalculator::calculateSingle(double ratio,
{ {
bool isTransient = false; bool isTransient = false;
//!!! We want to ensure, as close as possible, that the lock // We want to ensure, as close as possible, that the phase reset
// points appear at _exactly_ the right audio frame numbers // points appear at _exactly_ the right audio frame numbers.
// In principle, the threshold depends on chunk size: larger chunk
// sizes need higher thresholds. Since chunk size depends on
// ratio, I suppose we could in theory calculate the threshold
// from the ratio directly. For the moment we're happy if it
// works well in common situations.
//!!! depends on chunk size. larger chunk sizes need higher
//thresholds. since chunk size depends on ratio, I suppose we
//could in theory calculate the threshold from the ratio directly.
//For now we just frig it to work OK for a couple of common cases
float transientThreshold = 0.35; float transientThreshold = 0.35;
if (ratio > 1) transientThreshold = 0.25; if (ratio > 1) transientThreshold = 0.25;
@@ -231,13 +190,17 @@ StretchCalculator::calculateSingle(double ratio,
m_prevDf = df; m_prevDf = df;
if (isTransient && !m_wasTransient) { if (isTransient && m_transientAmnesty == 0) {
if (m_debugLevel > 1) { if (m_debugLevel > 1) {
std::cerr << "StretchCalculator::calculateSingle: transient found at " std::cerr << "StretchCalculator::calculateSingle: transient found at "
<< inputDurationSoFar << std::endl; << inputDurationSoFar << std::endl;
} }
m_divergence += m_increment - (m_increment * ratio); m_divergence += m_increment - (m_increment * ratio);
m_wasTransient = true;
// as in offline mode, 0.05 sec approx min between transients
m_transientAmnesty =
lrint(ceil(double(m_sampleRate) / (20 * double(m_increment))));
m_recovery = m_divergence / ((m_sampleRate / 10.0) / m_increment); m_recovery = m_divergence / ((m_sampleRate / 10.0) / m_increment);
return -m_increment; return -m_increment;
} }
@@ -247,17 +210,16 @@ StretchCalculator::calculateSingle(double ratio,
m_prevRatio = ratio; m_prevRatio = ratio;
} }
//!!! want transient amnesty as above (hard peak amnesty) if (m_transientAmnesty > 0) --m_transientAmnesty;
m_wasTransient = false;
int incr = lrint(m_increment * ratio - m_recovery); int incr = lrint(m_increment * ratio - m_recovery);
if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) { if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) {
std::cerr << "divergence = " << m_divergence << ", recovery = " << m_recovery << ", incr = " << incr << ", "; std::cerr << "divergence = " << m_divergence << ", recovery = " << m_recovery << ", incr = " << incr << ", ";
} }
if (incr < (m_increment * ratio) / 2) { if (incr < lrint((m_increment * ratio) / 2)) {
incr = (m_increment * ratio) / 2; incr = lrint((m_increment * ratio) / 2);
} else if (incr > m_increment * ratio * 2) { } else if (incr > lrint(m_increment * ratio * 2)) {
incr = m_increment * ratio * 2; incr = lrint(m_increment * ratio * 2);
} }
double divdiff = (m_increment * ratio) - incr; double divdiff = (m_increment * ratio) - incr;
@@ -288,58 +250,112 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
{ {
std::vector<float> df = smoothDF(rawDf); std::vector<float> df = smoothDF(rawDf);
// We distinguish between "soft" and "hard" peaks. A soft peak is
// simply the result of peak-picking on the smoothed onset
// detection function, and it represents any (strong-ish) onset.
// We aim to ensure always that soft peaks are placed at the
// correct position in time. A hard peak is where there is a very
// rapid rise in detection function, and it presumably represents
// a more broadband, noisy transient. For these we perform a
// phase reset (if in the appropriate mode), and we locate the
// reset at the first point where we notice enough of a rapid
// rise, rather than necessarily at the peak itself, in order to
// preserve the shape of the transient.
std::set<size_t> hardPeakCandidates; std::set<size_t> hardPeakCandidates;
std::set<size_t> softPeakCandidates; std::set<size_t> softPeakCandidates;
if (m_useHardPeaks) { if (m_useHardPeaks) {
//!!! this should depend on duration based on output increment surely? // 0.05 sec approx min between hard peaks
size_t hardPeakAmnesty = lrint(ceil(double(m_sampleRate) / size_t hardPeakAmnesty = lrint(ceil(double(m_sampleRate) /
(20 * double(m_increment)))); // 0.05 sec ish (20 * double(m_increment))));
// size_t hardPeakAmnesty = 5;
size_t prevHardPeak = 0; size_t prevHardPeak = 0;
if (m_debugLevel > 1) {
std::cerr << "hardPeakAmnesty = " << hardPeakAmnesty << std::endl; std::cerr << "hardPeakAmnesty = " << hardPeakAmnesty << std::endl;
}
for (size_t i = 1; i + 1 < df.size(); ++i) { for (size_t i = 1; i + 1 < df.size(); ++i) {
//!!! this ratio configurable? dependent on chunk size and sr?
if (df[i] < 0.1) continue; if (df[i] < 0.1) continue;
if (df[i] <= df[i-1] * 1.2) continue; if (df[i] <= df[i-1] * 1.1) continue;
if (df[i] < 0.22) continue;
if (df[i] > df[i-1] * 1.4 ||
(df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) ||
df[i] > 0.4) {
if (!hardPeakCandidates.empty() && if (!hardPeakCandidates.empty() &&
i < prevHardPeak + hardPeakAmnesty) { i < prevHardPeak + hardPeakAmnesty) {
continue; continue;
} }
bool hard = (df[i] > 0.4);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > absolute " << 0.4
<< std::endl;
}
if (!hard) {
hard = (df[i] > df[i-1] * 1.4);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > prev " << df[i-1] << " * 1.4"
<< std::endl;
}
}
if (!hard && i > 1) {
hard = (df[i] > df[i-1] * 1.2 &&
df[i-1] > df[i-2] * 1.2);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > prev " << df[i-1] << " * 1.2 and "
<< df[i-1] << " > prev " << df[i-2] << " * 1.2"
<< std::endl;
}
}
if (!hard && i > 2) {
// have already established that df[i] > df[i-1] * 1.1
hard = (df[i] > 0.3 &&
df[i-1] > df[i-2] * 1.1 &&
df[i-2] > df[i-3] * 1.1);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > prev " << df[i-1] << " * 1.1 and "
<< df[i-1] << " > prev " << df[i-2] << " * 1.1 and "
<< df[i-2] << " > prev " << df[i-3] << " * 1.1"
<< std::endl;
}
}
if (!hard) continue;
// (df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) ||
// df[i] > 0.4) {
size_t peakLocation = i; size_t peakLocation = i;
if (i + 1 < rawDf.size() && if (i + 1 < rawDf.size() &&
rawDf[i + 1] > rawDf[i] * 1.4) { rawDf[i + 1] > rawDf[i] * 1.4) {
++peakLocation; ++peakLocation;
}
if (m_debugLevel > 1) { if (m_debugLevel > 1) {
std::cerr << "hard peak at " << peakLocation << " (" << df[peakLocation] << " > " << df[peakLocation-1] << " * " << 1.4 << ")" << std::endl; std::cerr << "pushing hard peak forward to " << peakLocation << ": " << df[peakLocation] << " > " << df[peakLocation-1] << " * " << 1.4 << std::endl;
} }
}
hardPeakCandidates.insert(peakLocation); hardPeakCandidates.insert(peakLocation);
prevHardPeak = peakLocation; prevHardPeak = peakLocation;
} }
} }
}
//!!! we don't yet do the right thing with soft peaks. if
//!useHardPeaks, we should be resetting on soft peaks; if
//useHardPeaks, we should be ignoring soft peaks if they occur
//shortly after hard ones, otherwise either resetting on them, or
//at least making sure they fall at the correct sample time
// int mediansize = lrint(ceil(double(m_sampleRate) /
// (4 * double(m_increment)))); // 0.25 sec ish
size_t medianmaxsize = lrint(ceil(double(m_sampleRate) / size_t medianmaxsize = lrint(ceil(double(m_sampleRate) /
double(m_increment))); // 1 sec ish double(m_increment))); // 1 sec ish
// int mediansize = lrint(ceil(double(m_sampleRate) /
// (2 * double(m_increment)))); // 0.5 sec ish
if (m_debugLevel > 1) { if (m_debugLevel > 1) {
std::cerr << "mediansize = " << medianmaxsize << std::endl; std::cerr << "mediansize = " << medianmaxsize << std::endl;
@@ -382,16 +398,13 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
if (mediansize < 2) { if (mediansize < 2) {
if (mediansize > medianmaxsize) { // absurd, but never mind that if (mediansize > medianmaxsize) { // absurd, but never mind that
// std::cerr << "(<2) pop front ";
medianwin.pop_front(); medianwin.pop_front();
} }
if (nextDf < df.size()) { if (nextDf < df.size()) {
// std::cerr << "(<2) push back " << df[nextDf] << " ";
medianwin.push_back(df[nextDf]); medianwin.push_back(df[nextDf]);
} else { } else {
medianwin.push_back(0); medianwin.push_back(0);
} }
// std::cerr << "(<2) continue" << std::endl;
continue; continue;
} }
@@ -411,16 +424,16 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
if (index == sorted.size()-1 && index > 0) --index; if (index == sorted.size()-1 && index > 0) --index;
float thresh = sorted[index]; float thresh = sorted[index];
if (m_debugLevel > 2) { // if (m_debugLevel > 2) {
// std::cerr << "medianwin[" << middle << "] = " << medianwin[middle] << ", thresh = " << thresh << std::endl; // std::cerr << "medianwin[" << middle << "] = " << medianwin[middle] << ", thresh = " << thresh << std::endl;
if (medianwin[middle] == 0.f) { // if (medianwin[middle] == 0.f) {
// std::cerr << "contents: "; // std::cerr << "contents: ";
for (size_t j = 0; j < medianwin.size(); ++j) { // for (size_t j = 0; j < medianwin.size(); ++j) {
// std::cerr << medianwin[j] << " "; // std::cerr << medianwin[j] << " ";
} // }
// std::cerr << std::endl; // std::cerr << std::endl;
} // }
} // }
if (medianwin[middle] > thresh && if (medianwin[middle] > thresh &&
medianwin[middle] > medianwin[middle-1] && medianwin[middle] > medianwin[middle-1] &&
@@ -439,31 +452,21 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
} }
} }
//!!! we should distinguish between soft peaks (any found
//using the above method) and hard peaks, which also show
//a very rapid rise in detection function prior to the
//peak (the first value after the rise is not necessarily
//the peak itself, but it is probably where we should
//locate the phase reset). For hard peaks we need to
//reset in time to preserve the shape of the transient
//(unless some option is set to soft mode), for soft peaks
//we just want to avoid poor timing positioning so we
//build up to the reset at the exact peak moment.
// size_t peak = i + maxindex - mediansize;
size_t peak = i + maxindex - middle; size_t peak = i + maxindex - middle;
// std::cerr << "i = " << i << ", maxindex = " << maxindex << ", middle = " << middle << ", so peak at " << peak << std::endl; // std::cerr << "i = " << i << ", maxindex = " << maxindex << ", middle = " << middle << ", so peak at " << peak << std::endl;
// if (peak > 0) --peak; //!!! that's a fudge
if (softPeakCandidates.empty() || lastSoftPeak != peak) { if (softPeakCandidates.empty() || lastSoftPeak != peak) {
if (m_debugLevel > 1) { if (m_debugLevel > 1) {
std::cerr << "soft peak at " << peak << " (" << peak * m_increment << "): " std::cerr << "soft peak at " << peak << " ("
<< medianwin[middle] << " > " << thresh << " and " << peak * m_increment << "): "
<< medianwin[middle] << " > " << medianwin[middle-1] << " and " << medianwin[middle] << " > "
<< medianwin[middle] << " > " << medianwin[middle+1] << thresh << " and "
<< medianwin[middle]
<< " > " << medianwin[middle-1] << " and "
<< medianwin[middle]
<< " > " << medianwin[middle+1]
<< std::endl; << std::endl;
} }
@@ -484,57 +487,66 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
} else if (softPeakAmnesty > 0) --softPeakAmnesty; } else if (softPeakAmnesty > 0) --softPeakAmnesty;
// std::cerr << "i = " << i << " ";
if (mediansize >= medianmaxsize) { if (mediansize >= medianmaxsize) {
// std::cerr << "(>= " << medianmaxsize << ") pop front ";
medianwin.pop_front(); medianwin.pop_front();
} }
if (nextDf < df.size()) { if (nextDf < df.size()) {
// std::cerr << "(" << nextDf << "<" << df.size() << ") push back " << df[nextDf] << " ";
medianwin.push_back(df[nextDf]); medianwin.push_back(df[nextDf]);
} else { } else {
medianwin.push_back(0); medianwin.push_back(0);
} }
// std::cerr << "continue" << std::endl;
} }
std::vector<Peak> peaks; std::vector<Peak> peaks;
//!!!
// if (!softPeakCandidates.empty()) {
// std::cerr << "clearing " << softPeakCandidates.size() << " soft peak candidates" << std::endl;
// }
// softPeakCandidates.clear();
while (!hardPeakCandidates.empty() || !softPeakCandidates.empty()) { while (!hardPeakCandidates.empty() || !softPeakCandidates.empty()) {
bool haveHardPeak = !hardPeakCandidates.empty(); bool haveHardPeak = !hardPeakCandidates.empty();
bool haveSoftPeak = !softPeakCandidates.empty(); bool haveSoftPeak = !softPeakCandidates.empty();
size_t hardPeak = (haveHardPeak ? *hardPeakCandidates.begin() : 0); size_t hardPeak = (haveHardPeak ? *hardPeakCandidates.begin() : 0);
size_t softPeak = (haveSoftPeak ? *softPeakCandidates.begin() : 0); size_t softPeak = (haveSoftPeak ? *softPeakCandidates.begin() : 0);
Peak peak; Peak peak;
peak.hard = false; peak.hard = false;
peak.chunk = softPeak; peak.chunk = softPeak;
bool ignore = false;
if (haveHardPeak && if (haveHardPeak &&
(!haveSoftPeak || hardPeak <= softPeak)) { (!haveSoftPeak || hardPeak <= softPeak)) {
if (m_debugLevel > 2) { if (m_debugLevel > 2) {
std::cerr << "Hard peak: " << hardPeak << std::endl; std::cerr << "Hard peak: " << hardPeak << std::endl;
} }
peak.hard = true; peak.hard = true;
peak.chunk = hardPeak; peak.chunk = hardPeak;
hardPeakCandidates.erase(hardPeakCandidates.begin()); hardPeakCandidates.erase(hardPeakCandidates.begin());
} else { } else {
if (m_debugLevel > 2) { if (m_debugLevel > 2) {
std::cerr << "Soft peak: " << softPeak << std::endl; std::cerr << "Soft peak: " << softPeak << std::endl;
} }
if (!peaks.empty() &&
peaks[peaks.size()-1].hard &&
peaks[peaks.size()-1].chunk + 3 >= softPeak) {
if (m_debugLevel > 2) {
std::cerr << "(ignoring, as we just had a hard peak)"
<< std::endl;
}
ignore = true;
}
} }
if (haveSoftPeak && peak.chunk == softPeak) { if (haveSoftPeak && peak.chunk == softPeak) {
softPeakCandidates.erase(softPeakCandidates.begin()); softPeakCandidates.erase(softPeakCandidates.begin());
} }
if (!ignore) {
peaks.push_back(peak); peaks.push_back(peak);
} }
}
return peaks; return peaks;
} }
@@ -551,10 +563,6 @@ StretchCalculator::smoothDF(const std::vector<float> &df)
total += df[i]; ++count; total += df[i]; ++count;
if (i+1 < df.size()) { total += df[i+1]; ++count; } if (i+1 < df.size()) { total += df[i+1]; ++count; }
float mean = total / count; float mean = total / count;
// if (isnan(mean)) {
// std::cerr << "ERROR: mean at " << i << " (of " << df.size() << ") is NaN: dfs are: "
// << df[i-1] << ", " << df[i] << ", " << df[i+1] << std::endl;
// }
smoothedDF.push_back(mean); smoothedDF.push_back(mean);
} }
@@ -574,9 +582,9 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
// the region, we should set all the values up to that point to // the region, we should set all the values up to that point to
// the same value as the peak. // the same value as the peak.
//!!! this is not subtle enough, especially if the region is long // (This might not be subtle enough, especially if the region is
//-- we want a bound that corresponds to acoustic perception of // long -- we want a bound that corresponds to acoustic perception
//the audible bounce // of the audible bounce.)
for (size_t i = 1; i < df.size()/2; ++i) { for (size_t i = 1; i < df.size()/2; ++i) {
if (df[i] < df[i-1]) { if (df[i] < df[i-1]) {
@@ -600,16 +608,15 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
// tending back towards the maximum df, so that the stretchiness // tending back towards the maximum df, so that the stretchiness
// reduces at the end of the stretched region. // reduces at the end of the stretched region.
int reducedRegion = (0.1 * m_sampleRate) / m_increment; int reducedRegion = lrint((0.1 * m_sampleRate) / m_increment);
if (reducedRegion > df.size()/5) reducedRegion = df.size()/5; if (reducedRegion > int(df.size()/5)) reducedRegion = df.size()/5;
for (size_t i = 0; i < reducedRegion; ++i) { for (int i = 0; i < reducedRegion; ++i) {
size_t index = df.size() - reducedRegion + i; size_t index = df.size() - reducedRegion + i;
df[index] = df[index] + ((maxDf - df[index]) * i) / reducedRegion; df[index] = df[index] + ((maxDf - df[index]) * i) / reducedRegion;
} }
long toAllot = long(duration) - long(m_increment * df.size()); long toAllot = long(duration) - long(m_increment * df.size());
// bool negative = (toAllot < 0);
if (m_debugLevel > 1) { if (m_debugLevel > 1) {
std::cerr << "region of " << df.size() << " chunks, output duration " << duration << ", toAllot " << toAllot << std::endl; std::cerr << "region of " << df.size() << " chunks, output duration " << duration << ", toAllot " << toAllot << std::endl;
@@ -617,20 +624,25 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
size_t totalIncrement = 0; size_t totalIncrement = 0;
//!!! we need to place limits on the amount of displacement per // We place limits on the amount of displacement per chunk. if
//chunk. if ratio < 0, no increment should be larger than // ratio < 0, no increment should be larger than increment*ratio
//increment*ratio or smaller than increment*ratio/2; if ratio > 0, // or smaller than increment*ratio/2; if ratio > 0, none should be
//none should be smaller than increment*ratio or larger than // smaller than increment*ratio or larger than increment*ratio*2.
//increment*ratio*2. We need to enforce this in the assignment of // We need to enforce this in the assignment of displacements to
//displacements to allotments, not by trying to respond if // allotments, not by trying to respond if something turns out
//something turns out wrong // wrong.
//!!! ratio is only provided to this function for the purposes of // Note that the ratio is only provided to this function for the
//establishing this bound to the displacement // purposes of establishing this bound to the displacement.
// so if maxDisplacement / totalDisplacement > increment * ratio*2 - increment (for ratio > 1) // so if
// or maxDisplacement / totalDisplacement < increment * ratio/2 (for ratio < 1) // maxDisplacement / totalDisplacement > increment * ratio*2 - increment
// then we need to adjust... what? // (for ratio > 1)
// or
// maxDisplacement / totalDisplacement < increment * ratio/2
// (for ratio < 1)
// then we need to adjust and accommodate
bool acceptableSquashRange = false; bool acceptableSquashRange = false;
@@ -663,7 +675,7 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
int extremeIncrement = m_increment + lrint((toAllot * maxDisplacement) / totalDisplacement); int extremeIncrement = m_increment + lrint((toAllot * maxDisplacement) / totalDisplacement);
if (ratio < 1.0) { if (ratio < 1.0) {
if (extremeIncrement > lrint(ceil(m_increment * ratio))) { if (extremeIncrement > lrint(ceil(m_increment * ratio))) {
std::cerr << "ERROR: extreme increment " << extremeIncrement << " > " << m_increment * ratio << " (I thought this couldn't happen?)" << std::endl; std::cerr << "ERROR: extreme increment " << extremeIncrement << " > " << m_increment * ratio << " (this should not happen)" << std::endl;
} else if (extremeIncrement < (m_increment * ratio) / 2) { } else if (extremeIncrement < (m_increment * ratio) / 2) {
if (m_debugLevel > 0) { if (m_debugLevel > 0) {
std::cerr << "WARNING: extreme increment " << extremeIncrement << " < " << (m_increment * ratio) / 2 << std::endl; std::cerr << "WARNING: extreme increment " << extremeIncrement << " < " << (m_increment * ratio) / 2 << std::endl;
@@ -684,19 +696,8 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
if (!acceptableSquashRange) { if (!acceptableSquashRange) {
// Need to make maxDisplacement smaller as a proportion of // Need to make maxDisplacement smaller as a proportion of
// the total displacement, yet ensure that the // the total displacement, yet ensure that the
// displacements still sum to the total. How? // displacements still sum to the total.
// std::cerr << "Adjusting df values by " << maxDf/10 << "..." << std::endl;
// std::cerr << "now: ";
// for (size_t i = 0; i < df.size(); ++i) {
// df[i] += maxDf/10;
// std::cerr << df[i] << " ";
// }
// std::cerr << std::endl;
adj += maxDf/10; adj += maxDf/10;
//...
} }
} }
@@ -729,7 +730,9 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
int increment = m_increment + allotment; int increment = m_increment + allotment;
if (increment <= 0) { if (increment <= 0) {
//!!! this is a serious problem, the allocation is quite wrong if it allows increment to diverge so far from the input increment // this is a serious problem, the allocation is quite
// wrong if it allows increment to diverge so far from the
// input increment
std::cerr << "*** WARNING: increment " << increment << " <= 0, rounding to zero" << std::endl; std::cerr << "*** WARNING: increment " << increment << " <= 0, rounding to zero" << std::endl;
increment = 0; increment = 0;
allotment = increment - m_increment; allotment = increment - m_increment;

View File

@@ -81,7 +81,7 @@ protected:
double m_divergence; double m_divergence;
float m_recovery; float m_recovery;
float m_prevRatio; float m_prevRatio;
bool m_wasTransient; int m_transientAmnesty; // only in RT mode; handled differently offline
int m_debugLevel; int m_debugLevel;
bool m_useHardPeaks; bool m_useHardPeaks;

View File

@@ -39,7 +39,7 @@ RubberBandStretcher::Impl::ChannelData::construct(const std::set<size_t> &window
size_t realSize = maxSize/2 + 1; // size of the real "half" of freq data size_t realSize = maxSize/2 + 1; // size of the real "half" of freq data
std::cerr << "ChannelData::construct([" << windowSizes.size() << "], " << maxSize << ", " << outbufSize << ")" << std::endl; // std::cerr << "ChannelData::construct([" << windowSizes.size() << "], " << maxSize << ", " << outbufSize << ")" << std::endl;
if (outbufSize < maxSize) outbufSize = maxSize; if (outbufSize < maxSize) outbufSize = maxSize;
@@ -97,7 +97,7 @@ RubberBandStretcher::Impl::ChannelData::setWindowSize(size_t windowSize)
size_t oldSize = inbuf->getSize(); size_t oldSize = inbuf->getSize();
size_t realSize = windowSize/2 + 1; size_t realSize = windowSize/2 + 1;
std::cerr << "ChannelData::setWindowSize(" << windowSize << ") [from " << oldSize << "]" << std::endl; // std::cerr << "ChannelData::setWindowSize(" << windowSize << ") [from " << oldSize << "]" << std::endl;
if (oldSize >= windowSize) { if (oldSize >= windowSize) {
@@ -203,7 +203,7 @@ RubberBandStretcher::Impl::ChannelData::setOutbufSize(size_t outbufSize)
{ {
size_t oldSize = outbuf->getSize(); size_t oldSize = outbuf->getSize();
std::cerr << "ChannelData::setOutbufSize(" << outbufSize << ") [from " << oldSize << "]" << std::endl; // std::cerr << "ChannelData::setOutbufSize(" << outbufSize << ") [from " << oldSize << "]" << std::endl;
if (oldSize < outbufSize) { if (oldSize < outbufSize) {

View File

@@ -35,8 +35,15 @@ using std::min;
namespace RubberBand { namespace RubberBand {
static const size_t defaultIncrement = 256; const size_t
static const size_t defaultWindowSize = 2048; RubberBandStretcher::Impl::m_defaultIncrement = 256;
const size_t
RubberBandStretcher::Impl::m_defaultWindowSize = 2048;
int
RubberBandStretcher::Impl::m_defaultDebugLevel = 0;
RubberBandStretcher::Impl::Impl(RubberBandStretcher *stretcher, RubberBandStretcher::Impl::Impl(RubberBandStretcher *stretcher,
size_t sampleRate, size_t sampleRate,
@@ -48,15 +55,15 @@ RubberBandStretcher::Impl::Impl(RubberBandStretcher *stretcher,
m_channels(channels), m_channels(channels),
m_timeRatio(initialTimeRatio), m_timeRatio(initialTimeRatio),
m_pitchScale(initialPitchScale), m_pitchScale(initialPitchScale),
m_windowSize(defaultWindowSize), m_windowSize(m_defaultWindowSize),
m_increment(defaultIncrement), m_increment(m_defaultIncrement),
m_outbufSize(defaultWindowSize * 2), m_outbufSize(m_defaultWindowSize * 2),
m_maxProcessSize(defaultWindowSize), m_maxProcessSize(m_defaultWindowSize),
m_expectedInputDuration(0), m_expectedInputDuration(0),
m_threaded(false), m_threaded(false),
m_realtime(false), m_realtime(false),
m_options(options), m_options(options),
m_debugLevel(1), m_debugLevel(m_defaultDebugLevel),
m_mode(JustCreated), m_mode(JustCreated),
m_window(0), m_window(0),
m_studyFFT(0), m_studyFFT(0),
@@ -69,20 +76,32 @@ RubberBandStretcher::Impl::Impl(RubberBandStretcher *stretcher,
m_freq0(600), m_freq0(600),
m_freq1(1200), m_freq1(1200),
m_freq2(12000), m_freq2(12000),
m_baseWindowSize(defaultWindowSize) m_baseWindowSize(m_defaultWindowSize)
{ {
cerr << "RubberBandStretcher::Impl::Impl: options = " << options << endl; if (m_debugLevel > 0) {
cerr << "RubberBandStretcher::Impl::Impl: rate = " << m_stretcher->m_sampleRate << ", options = " << options << endl;
}
// Window size will vary according to the audio sample rate, but
// we don't let it drop below the 48k default
m_rateMultiple = float(m_stretcher->m_sampleRate) / 48000.f;
if (m_rateMultiple < 1.f) m_rateMultiple = 1.f;
m_baseWindowSize = roundUp(int(m_defaultWindowSize * m_rateMultiple));
if ((options & OptionWindowShort) || (options & OptionWindowLong)) { if ((options & OptionWindowShort) || (options & OptionWindowLong)) {
if ((options & OptionWindowShort) && (options & OptionWindowLong)) { if ((options & OptionWindowShort) && (options & OptionWindowLong)) {
cerr << "RubberBandStretcher::Impl::Impl: Cannot specify OptionWindowLong and OptionWindowShort together; falling back to OptionWindowStandard" << endl; cerr << "RubberBandStretcher::Impl::Impl: Cannot specify OptionWindowLong and OptionWindowShort together; falling back to OptionWindowStandard" << endl;
} else if (options & OptionWindowShort) { } else if (options & OptionWindowShort) {
m_baseWindowSize = defaultWindowSize / 2; m_baseWindowSize = m_baseWindowSize / 2;
if (m_debugLevel > 0) {
cerr << "setting baseWindowSize to " << m_baseWindowSize << endl; cerr << "setting baseWindowSize to " << m_baseWindowSize << endl;
}
} else if (options & OptionWindowLong) { } else if (options & OptionWindowLong) {
m_baseWindowSize = defaultWindowSize * 2; m_baseWindowSize = m_baseWindowSize * 2;
if (m_debugLevel > 0) {
cerr << "setting baseWindowSize to " << m_baseWindowSize << endl; cerr << "setting baseWindowSize to " << m_baseWindowSize << endl;
} }
}
m_windowSize = m_baseWindowSize; m_windowSize = m_baseWindowSize;
m_outbufSize = m_baseWindowSize * 2; m_outbufSize = m_baseWindowSize * 2;
m_maxProcessSize = m_baseWindowSize; m_maxProcessSize = m_baseWindowSize;
@@ -278,7 +297,7 @@ RubberBandStretcher::Impl::roundUp(size_t value)
void void
RubberBandStretcher::Impl::calculateSizes() RubberBandStretcher::Impl::calculateSizes()
{ {
size_t inputIncrement = defaultIncrement; size_t inputIncrement = m_defaultIncrement;
size_t windowSize = m_baseWindowSize; size_t windowSize = m_baseWindowSize;
size_t outputIncrement; size_t outputIncrement;
@@ -288,7 +307,7 @@ RubberBandStretcher::Impl::calculateSizes()
// use a fixed input increment // use a fixed input increment
inputIncrement = defaultIncrement; inputIncrement = roundUp(int(m_defaultIncrement * m_rateMultiple));
if (r < 1) { if (r < 1) {
outputIncrement = int(floor(inputIncrement * r)); outputIncrement = int(floor(inputIncrement * r));
@@ -340,7 +359,7 @@ RubberBandStretcher::Impl::calculateSizes()
} }
// windowSize can be almost anything, but it can't be greater than // windowSize can be almost anything, but it can't be greater than
// 4 * defaultWindowSize unless ratio is less than 1/1024. // 4 * m_baseWindowSize unless ratio is less than 1/1024.
m_windowSize = windowSize; m_windowSize = windowSize;
m_increment = inputIncrement; m_increment = inputIncrement;
@@ -357,15 +376,6 @@ RubberBandStretcher::Impl::calculateSizes()
cerr << "configure: window size = " << m_windowSize << ", increment = " << m_increment << " (approx output increment = " << int(lrint(m_increment * getEffectiveRatio())) << ")" << endl; cerr << "configure: window size = " << m_windowSize << ", increment = " << m_increment << " (approx output increment = " << int(lrint(m_increment * getEffectiveRatio())) << ")" << endl;
} }
static size_t maxWindowSize = 0;
if (m_windowSize > maxWindowSize) {
//!!!
cerr << "configure: NOTE: max window size so far increased from "
<< maxWindowSize << " to " << m_windowSize << endl;
maxWindowSize = m_windowSize;
}
if (m_windowSize > m_maxProcessSize) { if (m_windowSize > m_maxProcessSize) {
m_maxProcessSize = m_windowSize; m_maxProcessSize = m_windowSize;
} }
@@ -606,22 +616,19 @@ RubberBandStretcher::Impl::getLatency() const
void void
RubberBandStretcher::Impl::setTransientsOption(Options options) RubberBandStretcher::Impl::setTransientsOption(Options options)
{ {
//!!! m_options &= ~(OptionTransientsMixed |
if (options & OptionTransientsSmooth) { OptionTransientsSmooth |
m_options |= OptionTransientsSmooth; OptionTransientsCrisp);
} else { m_options |= options;
m_options &= ~OptionTransientsSmooth;
}
} }
void void
RubberBandStretcher::Impl::setPhaseOption(Options options) RubberBandStretcher::Impl::setPhaseOption(Options options)
{ {
if (options & OptionPhaseIndependent) { m_options &= ~(OptionPhaseAdaptive |
m_options |= OptionPhaseIndependent; OptionPhasePeakLocked |
} else { OptionPhaseIndependent);
m_options &= ~OptionPhaseIndependent; m_options |= options;
}
} }
void void

View File

@@ -80,6 +80,7 @@ public:
void calculateStretch(); void calculateStretch();
void setDebugLevel(int level); void setDebugLevel(int level);
static void setDefaultDebugLevel(int level) { m_defaultDebugLevel = level; }
protected: protected:
RubberBandStretcher *m_stretcher; RubberBandStretcher *m_stretcher;
@@ -174,10 +175,14 @@ protected:
float m_freq2; float m_freq2;
size_t m_baseWindowSize; size_t m_baseWindowSize;
float m_rateMultiple;
void writeOutput(RingBuffer<float> &to, float *from, void writeOutput(RingBuffer<float> &to, float *from,
size_t qty, size_t &outCount, size_t theoreticalOut); size_t qty, size_t &outCount, size_t theoreticalOut);
static int m_defaultDebugLevel;
static const size_t m_defaultIncrement;
static const size_t m_defaultWindowSize;
}; };
} }

View File

@@ -482,25 +482,32 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
cd.freqPeak[0] = 0; cd.freqPeak[0] = 0;
float freq0 = m_freq0; float freq0 = m_freq0;
float freq1 = m_freq1;
float freq2 = m_freq2;
// As the stretch ratio increases, so the frequency thresholds // As the stretch ratio increases, so the frequency thresholds
// for phase lamination should increase. Beyond a ratio of // for phase lamination should increase. Beyond a ratio of
// about 1.5, the threshold should be about 1200Hz; beyond a // about 1.5, the threshold should be about 1200Hz; beyond a
// ratio of 2, we probably want no lamination to happen at all // ratio of 2, we probably want no lamination to happen at all
// by default. This calculation aims for that. // by default. This calculation aims for more or less that.
// We only do this if the phase option is OptionPhaseAdaptive
// (the default), i.e. not Independent or PeakLocked.
//!!! we should only do this if asked to -- and when not if (!(m_options & OptionPhasePeakLocked)) {
//setting f0,f1,f2 explicitly
float r = getEffectiveRatio(); float r = getEffectiveRatio();
if (r > 1) { if (r > 1) {
float rf0 = 600 + (600 * ((r-1)*(r-1)*2)); float rf0 = 600 + (600 * ((r-1)*(r-1)*(r-1)*2));
// std::cerr << "ratio = " << r << ", rf0 = " << rf0 << std::endl; float f1ratio = freq1 / freq0;
float f2ratio = freq2 / freq0;
freq0 = std::max(freq0, rf0); freq0 = std::max(freq0, rf0);
freq1 = freq0 * f1ratio;
freq2 = freq0 * f2ratio;
}
} }
size_t limit0 = lrint((freq0 * m_windowSize) / rate); size_t limit0 = lrint((freq0 * m_windowSize) / rate);
size_t limit1 = lrint((m_freq1 * m_windowSize) / rate); size_t limit1 = lrint((freq1 * m_windowSize) / rate);
size_t limit2 = lrint((m_freq2 * m_windowSize) / rate); size_t limit2 = lrint((freq2 * m_windowSize) / rate);
size_t range = 0; size_t range = 0;
@@ -514,11 +521,6 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
for (size_t i = 0; i <= count; ++i) { for (size_t i = 0; i <= count; ++i) {
double mag = cd.mag[i]; double mag = cd.mag[i];
//!!! N.B. if the stretch ratio is very long, it's generally
//better not to attempt this phase lamination -- stick with
//range==0 throughout.
bool isPeak = true; bool isPeak = true;
for (size_t j = 1; j <= range; ++j) { for (size_t j = 1; j <= range; ++j) {
@@ -587,9 +589,7 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
bool resetThis = phaseReset; bool resetThis = phaseReset;
if (!(m_options & OptionTransientsSmooth) && if (m_options & OptionTransientsMixed) {
!(m_options & OptionTransientsCrisp)) {
// must be OptionTransientsMixed
size_t low = lrint((150 * m_windowSize) / rate); size_t low = lrint((150 * m_windowSize) / rate);
size_t high = lrint((1000 * m_windowSize) / rate); size_t high = lrint((1000 * m_windowSize) / rate);
if (resetThis) { if (resetThis) {
@@ -827,10 +827,11 @@ RubberBandStretcher::Impl::available() const
if (!m_threaded) { if (!m_threaded) {
for (size_t c = 0; c < m_channels; ++c) { for (size_t c = 0; c < m_channels; ++c) {
if (m_channelData[c]->inputSize >= 0) { if (m_channelData[c]->inputSize >= 0) {
cerr << "available: m_done true" << endl; // cerr << "available: m_done true" << endl;
if (m_channelData[c]->inbuf->getReadSpace() > 0) { if (m_channelData[c]->inbuf->getReadSpace() > 0) {
cerr << "calling processChunks(" << c << ") from available" << endl; // cerr << "calling processChunks(" << c << ") from available" << endl;
//!!! do we ever actually do this? if so, this method should not be const //!!! do we ever actually do this? if so, this method should not be const
// ^^^ yes, we do sometimes -- e.g. when fed a very short file
((RubberBandStretcher::Impl *)this)->processChunks(c); ((RubberBandStretcher::Impl *)this)->processChunks(c);
} }
} }

View File

@@ -22,6 +22,9 @@
#include <getopt.h> #include <getopt.h>
// for import and export of FFTW wisdom
#include <fftw3.h>
using namespace std; using namespace std;
using namespace RubberBand; using namespace RubberBand;
@@ -32,15 +35,19 @@ int main(int argc, char **argv)
double ratio = 1.0; double ratio = 1.0;
double pitchshift = 1.0; double pitchshift = 1.0;
double frequencyshift = 1.0; double frequencyshift = 1.0;
int debug = 1; int debug = 0;
bool realtime = false; bool realtime = false;
bool precise = false; bool precise = false;
bool threaded = true; bool threaded = true;
bool peaklock = true; bool peaklock = true;
bool longwin = false; bool longwin = false;
bool shortwin = false; bool shortwin = false;
bool softening = true;
int crispness = -1; int crispness = -1;
bool help = false; bool help = false;
bool quiet = false;
bool haveRatio = false;
enum { enum {
NoTransients, NoTransients,
@@ -53,7 +60,6 @@ int main(int argc, char **argv)
float fthresh2 = -1.f; float fthresh2 = -1.f;
while (1) { while (1) {
int thisOptind = optind ? optind : 1;
int optionIndex = 0; int optionIndex = 0;
static struct option longOpts[] = { static struct option longOpts[] = {
@@ -76,18 +82,20 @@ int main(int argc, char **argv)
{ "thresh1", 1, 0, '6' }, { "thresh1", 1, 0, '6' },
{ "thresh2", 1, 0, '7' }, { "thresh2", 1, 0, '7' },
{ "bl-transients", 0, 0, '8' }, { "bl-transients", 0, 0, '8' },
{ "no-softening", 0, 0, '9' },
{ "quiet", 0, 0, 'q' },
{ 0, 0, 0 } { 0, 0, 0 }
}; };
c = getopt_long(argc, argv, "t:p:d:RPc:f:", longOpts, &optionIndex); c = getopt_long(argc, argv, "t:p:d:RPc:f:qh", longOpts, &optionIndex);
if (c == -1) break; if (c == -1) break;
switch (c) { switch (c) {
case 'h': help = true; break; case 'h': help = true; break;
case 't': ratio *= atof(optarg); break; case 't': ratio *= atof(optarg); haveRatio = true; break;
case 'T': { double m = atof(optarg); if (m != 0.0) ratio /= m; } break; case 'T': { double m = atof(optarg); if (m != 0.0) ratio /= m; }; haveRatio = true; break;
case 'p': pitchshift = atof(optarg); break; case 'p': pitchshift = atof(optarg); haveRatio = true; break;
case 'f': frequencyshift = atof(optarg); break; case 'f': frequencyshift = atof(optarg); haveRatio = true; break;
case 'd': debug = atoi(optarg); break; case 'd': debug = atoi(optarg); break;
case 'R': realtime = true; break; case 'R': realtime = true; break;
case 'P': precise = true; break; case 'P': precise = true; break;
@@ -100,20 +108,22 @@ int main(int argc, char **argv)
case '6': fthresh1 = atof(optarg); break; case '6': fthresh1 = atof(optarg); break;
case '7': fthresh2 = atof(optarg); break; case '7': fthresh2 = atof(optarg); break;
case '8': transients = BandLimitedTransients; break; case '8': transients = BandLimitedTransients; break;
case '9': softening = false; break;
case 'c': crispness = atoi(optarg); break; case 'c': crispness = atoi(optarg); break;
default: break; case 'q': quiet = true; break;
default: help = true; break;
} }
} }
if (help || optind + 2 != argc) { if (help || !haveRatio || optind + 2 != argc) {
cerr << endl; cerr << endl;
cerr << "Rubber Band" << endl; cerr << "Rubber Band" << endl;
cerr << "An audio time-stretching and pitch-shifting library and utility program." << endl; cerr << "An audio time-stretching and pitch-shifting library and utility program." << endl;
cerr << "Copyright 2007 Chris Cannam. Distributed under the GNU General Public License." << endl; cerr << "Copyright 2007 Chris Cannam. Distributed under the GNU General Public License." << endl;
cerr << endl; cerr << endl;
cerr << "Usage: " << argv[0] << " [options] <infile.wav> <outfile.wav>" << endl; cerr << " Usage: " << argv[0] << " [options] <infile.wav> <outfile.wav>" << endl;
cerr << endl; cerr << endl;
cerr << "where options may be:" << endl; cerr << "You must specify at least one of the following time and pitch ratio options." << endl;
cerr << endl; cerr << endl;
cerr << " -t<X>, --time <X> Stretch to X times original duration, or" << endl; cerr << " -t<X>, --time <X> Stretch to X times original duration, or" << endl;
cerr << " -T<X>, --tempo <X> Change tempo by multiple X (equivalent to --time 1/X)" << endl; cerr << " -T<X>, --tempo <X> Change tempo by multiple X (equivalent to --time 1/X)" << endl;
@@ -121,9 +131,12 @@ int main(int argc, char **argv)
cerr << " -p<X>, --pitch <X> Raise pitch by X semitones, or" << endl; cerr << " -p<X>, --pitch <X> Raise pitch by X semitones, or" << endl;
cerr << " -f<X>, --frequency <X> Change frequency by multiple X" << endl; cerr << " -f<X>, --frequency <X> Change frequency by multiple X" << endl;
cerr << endl; cerr << endl;
cerr << " -c<N>, --crisp <N> Crispness (N = 0,1,2,3); default 2 (see below)" << endl; cerr << "The following option provides a simple way to adjust the sound. See below" << endl;
cerr << "for more details." << endl;
cerr << endl; cerr << endl;
cerr << "The following options adjust the processing mode and stretch algorithm." << endl; cerr << " -c<N>, --crisp <N> Crispness (N = 0,1,2,3,4,5); default 4 (see below)" << endl;
cerr << endl;
cerr << "The remaining options fine-tune the processing mode and stretch algorithm." << endl;
cerr << "These are mostly included for test purposes; the default settings and standard" << endl; cerr << "These are mostly included for test purposes; the default settings and standard" << endl;
cerr << "crispness parameter are intended to provide the best sounding set of options" << endl; cerr << "crispness parameter are intended to provide the best sounding set of options" << endl;
cerr << "for most situations." << endl; cerr << "for most situations." << endl;
@@ -132,31 +145,38 @@ int main(int argc, char **argv)
cerr << " -R, --realtime Select realtime mode (implies -P --no-threads)" << endl; cerr << " -R, --realtime Select realtime mode (implies -P --no-threads)" << endl;
cerr << " --no-threads No extra threads regardless of cpus/channel count" << endl; cerr << " --no-threads No extra threads regardless of cpus/channel count" << endl;
cerr << " --no-transients Disable phase resynchronisation at transients" << endl; cerr << " --no-transients Disable phase resynchronisation at transients" << endl;
cerr << " --bl-transients Band-limit phase resync to extreme frequencies" << endl;
cerr << " --no-peaklock Disable phase locking to peak frequencies" << endl; cerr << " --no-peaklock Disable phase locking to peak frequencies" << endl;
cerr << " --no-softening Disable large-ratio softening of phase locking" << endl;
cerr << " --window-long Use longer processing window (actual size may vary)" << endl; cerr << " --window-long Use longer processing window (actual size may vary)" << endl;
cerr << " --window-short Use shorter processing window" << endl; cerr << " --window-short Use shorter processing window" << endl;
cerr << " --thresh<N> <F> Set internal freq threshold N (N = 0,1,2) to F Hz" << endl; cerr << " --thresh<N> <F> Set internal freq threshold N (N = 0,1,2) to F Hz" << endl;
cerr << endl; cerr << endl;
cerr << " -d<N>, --debug <N> Select debug level (N = 0,1,2,3); default 1, full 3" << std::endl; cerr << " -d<N>, --debug <N> Select debug level (N = 0,1,2,3); default 0, full 3" << endl;
cerr << " (N.B. debug level 3 includes audible ticks in output)" << endl; cerr << " (N.B. debug level 3 includes audible ticks in output)" << endl;
cerr << " -q, --quiet Suppress progress output" << endl;
cerr << endl; cerr << endl;
cerr << " -h, --help Show this help" << endl; cerr << " -h, --help Show this help" << endl;
cerr << endl; cerr << endl;
cerr << "\"Crispness\" levels:" << endl; cerr << "\"Crispness\" levels:" << endl;
cerr << " -c 0 equivalent to --no-transients --no-peaklock" << endl; cerr << " -c 0 equivalent to --no-transients --no-peaklock --window-long" << endl;
cerr << " -c 1 equivalent to --no-peaklock" << endl; cerr << " -c 1 equivalent to --no-transients --no-peaklock" << endl;
cerr << " -c 2 default processing options" << endl; cerr << " -c 2 equivalent to --no-transients" << endl;
cerr << " -c 3 equivalent to --no-peaklock --window-short (may be suitable for drums)" << endl; cerr << " -c 3 equivalent to --bl-transients" << endl;
cerr << " -c 4 default processing options" << endl;
cerr << " -c 5 equivalent to --no-peaklock --window-short (may be suitable for drums)" << endl;
cerr << endl; cerr << endl;
return 2; return 2;
} }
switch (crispness) { switch (crispness) {
case -1: crispness = 2; break; case -1: crispness = 4; break;
case 0: transients = NoTransients; peaklock = false; longwin = false; shortwin = false; break; case 0: transients = NoTransients; peaklock = false; longwin = true; shortwin = false; break;
case 1: transients = Transients; peaklock = false; longwin = false; shortwin = false; break; case 1: transients = NoTransients; peaklock = false; longwin = false; shortwin = false; break;
case 2: transients = Transients; peaklock = true; longwin = false; shortwin = false; break; case 2: transients = NoTransients; peaklock = true; longwin = false; shortwin = false; break;
case 3: transients = Transients; peaklock = false; longwin = false; shortwin = true; break; case 3: transients = BandLimitedTransients; peaklock = true; longwin = false; shortwin = false; break;
case 4: transients = Transients; peaklock = true; longwin = false; shortwin = false; break;
case 5: transients = Transients; peaklock = false; longwin = false; shortwin = true; break;
}; };
char *fileName = strdup(argv[optind++]); char *fileName = strdup(argv[optind++]);
@@ -195,8 +215,8 @@ int main(int argc, char **argv)
RubberBandStretcher::Options options = 0; RubberBandStretcher::Options options = 0;
if (realtime) options |= RubberBandStretcher::OptionProcessRealTime; if (realtime) options |= RubberBandStretcher::OptionProcessRealTime;
if (precise) options |= RubberBandStretcher::OptionStretchPrecise; if (precise) options |= RubberBandStretcher::OptionStretchPrecise;
// if (!transients) options |= RubberBandStretcher::OptionTransientsSmooth;
if (!peaklock) options |= RubberBandStretcher::OptionPhaseIndependent; if (!peaklock) options |= RubberBandStretcher::OptionPhaseIndependent;
if (!softening) options |= RubberBandStretcher::OptionPhasePeakLocked;
if (!threaded) options |= RubberBandStretcher::OptionThreadingNone; if (!threaded) options |= RubberBandStretcher::OptionThreadingNone;
if (longwin) options |= RubberBandStretcher::OptionWindowLong; if (longwin) options |= RubberBandStretcher::OptionWindowLong;
if (shortwin) options |= RubberBandStretcher::OptionWindowShort; if (shortwin) options |= RubberBandStretcher::OptionWindowShort;
@@ -217,16 +237,13 @@ int main(int argc, char **argv)
frequencyshift *= pow(2.0, pitchshift / 12); frequencyshift *= pow(2.0, pitchshift / 12);
} }
RubberBandStretcher::setDefaultDebugLevel(debug);
RubberBandStretcher ts(sfinfo.samplerate, channels, options, RubberBandStretcher ts(sfinfo.samplerate, channels, options,
ratio, frequencyshift); ratio, frequencyshift);
ts.setDebugLevel(debug);
ts.setExpectedInputDuration(sfinfo.frames); ts.setExpectedInputDuration(sfinfo.frames);
// ts.setTimeRatio(ratio);
// ts.setPitchScale(pitchshift);
float *fbuf = new float[channels * ibs]; float *fbuf = new float[channels * ibs];
float **ibuf = new float *[channels]; float **ibuf = new float *[channels];
for (size_t i = 0; i < channels; ++i) ibuf[i] = new float[ibs]; for (size_t i = 0; i < channels; ++i) ibuf[i] = new float[ibs];
@@ -239,12 +256,12 @@ int main(int argc, char **argv)
if (!realtime) { if (!realtime) {
cerr << "First pass (studying)..." << endl; if (!quiet) {
cerr << "Pass 1: Studying..." << endl;
}
while (frame < sfinfo.frames) { while (frame < sfinfo.frames) {
// std::cout << "study frame " << frame << std::endl;
int count = -1; int count = -1;
if (sf_seek(sndfile, frame, SEEK_SET) < 0) break; if (sf_seek(sndfile, frame, SEEK_SET) < 0) break;
@@ -264,15 +281,17 @@ int main(int argc, char **argv)
int p = int((double(frame) * 100.0) / sfinfo.frames); int p = int((double(frame) * 100.0) / sfinfo.frames);
if (p > percent || frame == 0) { if (p > percent || frame == 0) {
percent = p; percent = p;
if (!quiet) {
cerr << "\r" << percent << "% "; cerr << "\r" << percent << "% ";
} }
}
frame += ibs; frame += ibs;
} }
cerr << endl; if (!quiet) {
cerr << "\rCalculating profile..." << endl;
cerr << "Second pass (processing)..." << endl; }
} }
frame = 0; frame = 0;
@@ -306,13 +325,8 @@ int main(int argc, char **argv)
ts.process(ibuf, count, final); ts.process(ibuf, count, final);
// if
// std::cerr << frame << " + " << ibs << " >= " << sfinfo.frames << ": calling ts.complete()!" << std::endl;
// ts.complete();
// }
int avail = ts.available(); int avail = ts.available();
if (debug > 1) std::cerr << "available = " << avail << std::endl; if (debug > 1) cerr << "available = " << avail << endl;
if (avail > 0) { if (avail > 0) {
float **obf = new float *[channels]; float **obf = new float *[channels];
@@ -323,7 +337,7 @@ int main(int argc, char **argv)
countOut += avail; countOut += avail;
float *fobf = new float[channels * avail]; float *fobf = new float[channels * avail];
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t i = 0; i < avail; ++i) { for (int i = 0; i < avail; ++i) {
float value = obf[c][i]; float value = obf[c][i];
if (fabsf(value) > outpeak) outpeak = fabsf(value); if (fabsf(value) > outpeak) outpeak = fabsf(value);
outsum += value * value; outsum += value * value;
@@ -333,13 +347,13 @@ int main(int argc, char **argv)
fobf[i * channels + c] = value; fobf[i * channels + c] = value;
} }
} }
// std::cout << "fobf mean: "; // cout << "fobf mean: ";
// double d = 0; // double d = 0;
// for (int i = 0; i < avail; ++i) { // for (int i = 0; i < avail; ++i) {
// d += fobf[i]; // d += fobf[i];
// } // }
// d /= avail; // d /= avail;
// std::cout << d << std::endl; // cout << d << endl;
sf_writef_float(sndfileOut, fobf, avail); sf_writef_float(sndfileOut, fobf, avail);
delete[] fobf; delete[] fobf;
for (size_t i = 0; i < channels; ++i) { for (size_t i = 0; i < channels; ++i) {
@@ -348,20 +362,31 @@ int main(int argc, char **argv)
delete[] obf; delete[] obf;
} }
if (frame == 0 && !realtime && !quiet) {
cerr << "Pass 2: Processing..." << endl;
}
int p = int((double(frame) * 100.0) / sfinfo.frames); int p = int((double(frame) * 100.0) / sfinfo.frames);
if (p > percent || frame == 0) { if (p > percent || frame == 0) {
percent = p; percent = p;
if (!quiet) {
cerr << "\r" << percent << "% "; cerr << "\r" << percent << "% ";
} }
}
frame += ibs; frame += ibs;
} }
if (!quiet) {
cerr << "\r " << endl;
}
int avail; int avail;
while ((avail = ts.available()) >= 0) { while ((avail = ts.available()) >= 0) {
if (debug > 1) std::cerr << "(completing) available = " << avail << std::endl; if (debug > 1) {
cerr << "(completing) available = " << avail << endl;
}
if (avail > 0) { if (avail > 0) {
float **obf = new float *[channels]; float **obf = new float *[channels];
@@ -372,7 +397,7 @@ int main(int argc, char **argv)
countOut += avail; countOut += avail;
float *fobf = new float[channels * avail]; float *fobf = new float[channels * avail];
for (size_t c = 0; c < channels; ++c) { for (size_t c = 0; c < channels; ++c) {
for (size_t i = 0; i < avail; ++i) { for (int i = 0; i < avail; ++i) {
float value = obf[c][i]; float value = obf[c][i];
if (fabsf(value) > outpeak) outpeak = fabsf(value); if (fabsf(value) > outpeak) outpeak = fabsf(value);
outsum += value * value; outsum += value * value;
@@ -389,6 +414,8 @@ int main(int argc, char **argv)
delete[] obf[i]; delete[] obf[i];
} }
delete[] obf; delete[] obf;
} else {
usleep(10000);
} }
} }
@@ -398,7 +425,9 @@ int main(int argc, char **argv)
double inmean = sqrt(insum / (sfinfo.frames * sfinfo.channels)); double inmean = sqrt(insum / (sfinfo.frames * sfinfo.channels));
double outmean = sqrt(outsum / (countOut * sfinfo.channels)); double outmean = sqrt(outsum / (countOut * sfinfo.channels));
cerr << endl << "in: " << countIn << ", out: " << countOut << ", ratio: " << float(countOut)/float(countIn) << ", ideal output: " << lrint(countIn * ratio) << ", diff: " << abs(lrint(countIn * ratio) - int(countOut)) << endl; if (!quiet) {
cerr << "in: " << countIn << ", out: " << countOut << ", ratio: " << float(countOut)/float(countIn) << ", ideal output: " << lrint(countIn * ratio) << ", error: " << abs(lrint(countIn * ratio) - int(countOut)) << endl;
cerr << "input peak: " << inpeak << "; output peak " << outpeak << "; gain " << (inpeak > 0 ? outpeak/inpeak : 1) << endl; cerr << "input peak: " << inpeak << "; output peak " << outpeak << "; gain " << (inpeak > 0 ? outpeak/inpeak : 1) << endl;
cerr << "input rms: " << inmean << "; output rms " << outmean << "; gain " << (inmean > 0 ? outmean/inmean : 1) << endl; cerr << "input rms: " << inmean << "; output rms " << outmean << "; gain " << (inmean > 0 ? outmean/inmean : 1) << endl;
@@ -406,9 +435,6 @@ int main(int argc, char **argv)
struct timeval etv; struct timeval etv;
(void)gettimeofday(&etv, 0); (void)gettimeofday(&etv, 0);
cerr << "\nstart: " << tv.tv_sec << ":" << tv.tv_usec << endl;
cerr << "finish: " << etv.tv_sec << ":" << etv.tv_usec << endl;
etv.tv_sec -= tv.tv_sec; etv.tv_sec -= tv.tv_sec;
if (etv.tv_usec < tv.tv_usec) { if (etv.tv_usec < tv.tv_usec) {
etv.tv_usec += 1000000; etv.tv_usec += 1000000;
@@ -416,10 +442,9 @@ int main(int argc, char **argv)
} }
etv.tv_usec -= tv.tv_usec; etv.tv_usec -= tv.tv_usec;
cerr << "elapsed: " << etv.tv_sec << ":" << etv.tv_usec << endl;
double sec = double(etv.tv_sec) + (double(etv.tv_usec) / 1000000.0); double sec = double(etv.tv_sec) + (double(etv.tv_usec) / 1000000.0);
cerr << "\nin/sec: " << countIn/sec << ", out/sec: " << countOut/sec << endl; cerr << "\nelapsed time: " << sec << " sec, in frames/sec: " << countIn/sec << ", out frames/sec: " << countOut/sec << endl;
}
return 0; return 0;
} }