* Improvements to offline phase reset point detection

* Better support for higher sample rates
* Save and restore FFTW wisdom (needs work still)
* More tidying, options and argument overhauls
This commit is contained in:
Chris Cannam
2007-11-20 20:17:13 +00:00
parent e9cb6dbc37
commit 7c4fcd85da
11 changed files with 408 additions and 322 deletions

View File

@@ -34,12 +34,13 @@ public:
static const int OptionStretchElastic = 0x00000000;
static const int OptionStretchPrecise = 0x00000010;
static const int OptionTransientsMixed = 0x00000000;
static const int OptionTransientsSmooth = 0x00000100;
static const int OptionTransientsCrisp = 0x00000200;
static const int OptionTransientsCrisp = 0x00000000;
static const int OptionTransientsMixed = 0x00000100;
static const int OptionTransientsSmooth = 0x00000200;
static const int OptionPhasePeakLocked = 0x00000000;
static const int OptionPhaseIndependent = 0x00001000;
static const int OptionPhaseAdaptive = 0x00000000;
static const int OptionPhasePeakLocked = 0x00001000;
static const int OptionPhaseIndependent = 0x00002000;
static const int OptionThreadingAuto = 0x00000000;
static const int OptionThreadingNone = 0x00010000;
@@ -98,6 +99,8 @@ public:
virtual void setDebugLevel(int level);
static void setDefaultDebugLevel(int level);
protected:
class Impl;
Impl *m_d;

View File

@@ -20,6 +20,7 @@
#include <cmath>
#include <iostream>
#include <map>
#include <cstdio>
#include <vector>
class FFTImpl
@@ -56,12 +57,17 @@ public:
~D_FFTW() {
if (m_fplanf) {
//!!! shouldn't do this every time, but only when the last one
// is destroyed (likewise shouldn't load every time) -- want
// a static refcount + mutex
saveWisdom('f');
fftwf_destroy_plan(m_fplanf);
fftwf_destroy_plan(m_fplani);
fftwf_free(m_fbuf);
fftwf_free(m_fpacked);
}
if (m_dplanf) {
saveWisdom('d');
fftw_destroy_plan(m_dplanf);
fftw_destroy_plan(m_dplani);
fftw_free(m_dbuf);
@@ -73,6 +79,7 @@ public:
void initFloat() {
if (m_fplanf) return;
loadWisdom('f');
m_fbuf = (float *)fftw_malloc(m_size * sizeof(float));
m_fpacked = (fftwf_complex *)fftw_malloc
((m_size/2 + 1) * sizeof(fftwf_complex));
@@ -84,6 +91,7 @@ public:
void initDouble() {
if (m_dplanf) return;
loadWisdom('d');
m_dbuf = (double *)fftw_malloc(m_size * sizeof(double));
m_dpacked = (fftw_complex *)fftw_malloc
((m_size/2 + 1) * sizeof(fftw_complex));
@@ -93,6 +101,35 @@ public:
(m_size, m_dpacked, m_dbuf, FFTW_MEASURE);
}
void loadWisdom(char type) { wisdom(false, type); }
void saveWisdom(char type) { wisdom(true, type); }
void wisdom(bool save, char type) {
const char *home = getenv("HOME");
if (!home) return;
char fn[256];
snprintf(fn, 256, "%s/%s.%c", home, ".rubberband.wisdom", type);
FILE *f = fopen(fn, save ? "wb" : "rb");
if (!f) return;
if (save) {
switch (type) {
case 'f': fftwf_export_wisdom_to_file(f); break;
case 'd': fftw_export_wisdom_to_file(f); break;
}
} else {
switch (type) {
case 'f': fftwf_import_wisdom_from_file(f); break;
case 'd': fftw_import_wisdom_from_file(f); break;
}
}
fclose(f);
}
void packFloat(float *re, float *im) {
for (unsigned int i = 0; i <= m_size/2; ++i) {
m_fpacked[i][0] = re[i];
@@ -508,14 +545,14 @@ FFT::FFT(unsigned int size)
break;
case 1:
std::cerr << "FFT::FFT(" << size << "): using FFTW3 implementation"
<< std::endl;
// std::cerr << "FFT::FFT(" << size << "): using FFTW3 implementation"
// << std::endl;
d = new D_FFTW(size);
break;
default:
std::cerr << "FFT::FFT(" << size << "): using built-in implementation"
<< std::endl;
// std::cerr << "FFT::FFT(" << size << "): using built-in implementation"
// << std::endl;
d = new D_Cross(size);
break;
}

View File

@@ -52,8 +52,8 @@ Resampler::D::D(Quality quality, size_t channels, size_t maxBufferSize) :
m_iinsize(0),
m_ioutsize(0)
{
std::cerr << "Resampler::Resampler: using libsamplerate implementation"
<< std::endl;
// std::cerr << "Resampler::Resampler: using libsamplerate implementation"
// << std::endl;
int err = 0;
m_src = src_new(quality == Best ? SRC_SINC_BEST_QUALITY :
@@ -122,7 +122,7 @@ Resampler::D::resample(float **in, float **out, size_t incount, float ratio,
//!!! check err, respond appropriately
if (m_channels > 1) {
for (size_t i = 0; i < data.output_frames_gen; ++i) {
for (int i = 0; i < data.output_frames_gen; ++i) {
for (size_t c = 0; c < m_channels; ++c) {
out[c][i] = m_iout[i * m_channels + c];
}

View File

@@ -179,6 +179,11 @@ RubberBandStretcher::setDebugLevel(int level)
m_d->setDebugLevel(level);
}
void
RubberBandStretcher::setDefaultDebugLevel(int level)
{
Impl::setDefaultDebugLevel(level);
}
}

View File

@@ -32,10 +32,10 @@ StretchCalculator::StretchCalculator(size_t sampleRate,
m_divergence(0),
m_recovery(0),
m_prevRatio(1.0),
m_wasTransient(false),
m_transientAmnesty(0),
m_useHardPeaks(useHardPeaks)
{
std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl;
// std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl;
}
StretchCalculator::~StretchCalculator()
@@ -47,38 +47,6 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
const std::vector<float> &phaseResetDf,
const std::vector<float> &stretchDf)
{
// Method:
//!!! This description is out of date.
// 1. Pre-process the df array, and for each (say) one second's
// worth of values, calculate the number of peaks that would
// qualify for phase reset given the default threshold. Then
// reduce or increase the threshold by stages until that number is
// in a sensible range (say 1-10 peaks per second -- the low end
// is harder to estimate than the high end, so it may be better to
// start with a high sensitivity and reduce it).
// 2. Record the positions of peaks, and separately the positions
// of those peaks that qualify for reset using the sliding
// threshold window. Don't permit two locked peaks within a very
// short time frame (e.g. 30-50ms).
// 3. Map each of the locked peaks (or any peaks that are over a
// given intensity?), as well as the start and end points, to a
// proportionate position in the newly stretched array so as to
// ensure that their timing is strictly "correct".
// 4. Calculate how much time is left in the stretch total, after
// each of the locked chunks has been allocated its static
// allowance. Also count the non-locked chunks.
// 5. For each region between two locked chunks, calculate the
// number of samples to allocate that region given the time
// available for stretch and the number of non-locked chunks.
// Then divvy them up... how exactly?
assert(phaseResetDf.size() == stretchDf.size());
m_lastPeaks = findPeaks(phaseResetDf);
@@ -89,13 +57,10 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
size_t outputDuration = lrint(inputDuration * ratio);
std::cerr << "debug level: " << m_debugLevel << std::endl;
if (m_debugLevel > 0) {
std::cerr << "StretchCalculator::calculate(): inputDuration " << inputDuration << ", ratio " << ratio << ", outputDuration " << outputDuration;
}
//!!! round down?
outputDuration = lrint((phaseResetDf.size() * m_increment) * ratio);
if (m_debugLevel > 0) {
@@ -103,41 +68,32 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
std::cerr << ", df size " << phaseResetDf.size() << std::endl;
}
// size_t stretchable = outputDuration - lockCount * m_increment;
std::vector<size_t> fixedAudioChunks;
for (size_t i = 0; i < peaks.size(); ++i) {
fixedAudioChunks.push_back
//!!! this should be rounding down, shouldn't it? not lrint?
(lrint((double(peaks[i].chunk) * outputDuration) / totalCount));
}
// size_t lockIndex = 0;
if (m_debugLevel > 1) {
std::cerr << "have " << peaks.size() << " fixed positions" << std::endl;
}
size_t totalInput = 0, totalOutput = 0;
// so for each inter-lock region, we want to take the number of
// output chunks to be allocated and the detection function values
// within the range, and produce a series of increments that sum
// to the number of output chunks, such that each increment is
// displaced from the input increment by an amount inversely
// proportional to the magnitude of the detection function at that
// input step. Ideally the detection function would have been
// somewhat smoothed for this purpose but we'll start raw.
//!!! Actually, we would possibly be better off using a fixed
// smooth curve than the detection function itself.
// For each region between two consecutive time sync points, we
// want to take the number of output chunks to be allocated and
// the detection function values within the range, and produce a
// series of increments that sum to the number of output chunks,
// such that each increment is displaced from the input increment
// by an amount inversely proportional to the magnitude of the
// stretch detection function at that input step.
size_t regionTotalChunks = 0;
for (size_t i = 0; i <= peaks.size(); ++i) {
size_t regionStart, regionStartChunk, regionEnd, regionEndChunk;
bool phaseLock = false;
bool phaseReset = false;
if (i == 0) {
regionStartChunk = 0;
@@ -145,7 +101,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
} else {
regionStartChunk = peaks[i-1].chunk;
regionStart = fixedAudioChunks[i-1];
phaseLock = peaks[i-1].hard;
phaseReset = peaks[i-1].hard;
}
if (i == peaks.size()) {
@@ -172,7 +128,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
dfRegion = smoothDF(dfRegion);
std::vector<int> regionIncrements = distributeRegion
(dfRegion, regionDuration, ratio, phaseLock);
(dfRegion, regionDuration, ratio, phaseReset);
size_t totalForRegion = 0;
@@ -180,7 +136,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
int incr = regionIncrements[j];
if (j == 0 && phaseLock) increments.push_back(-incr);
if (j == 0 && phaseReset) increments.push_back(-incr);
else increments.push_back(incr);
if (incr > 0) totalForRegion += incr;
@@ -200,6 +156,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
std::cerr << "total input increment = " << totalInput << " (= " << totalInput / m_increment << " chunks), output = " << totalOutput << ", ratio = " << double(totalOutput)/double(totalInput) << ", ideal output " << ceil(totalInput * ratio) << std::endl;
std::cerr << "(region total = " << regionTotalChunks << ")" << std::endl;
}
return increments;
}
@@ -210,13 +167,15 @@ StretchCalculator::calculateSingle(double ratio,
{
bool isTransient = false;
//!!! We want to ensure, as close as possible, that the lock
// points appear at _exactly_ the right audio frame numbers
// We want to ensure, as close as possible, that the phase reset
// points appear at _exactly_ the right audio frame numbers.
// In principle, the threshold depends on chunk size: larger chunk
// sizes need higher thresholds. Since chunk size depends on
// ratio, I suppose we could in theory calculate the threshold
// from the ratio directly. For the moment we're happy if it
// works well in common situations.
//!!! depends on chunk size. larger chunk sizes need higher
//thresholds. since chunk size depends on ratio, I suppose we
//could in theory calculate the threshold from the ratio directly.
//For now we just frig it to work OK for a couple of common cases
float transientThreshold = 0.35;
if (ratio > 1) transientThreshold = 0.25;
@@ -231,13 +190,17 @@ StretchCalculator::calculateSingle(double ratio,
m_prevDf = df;
if (isTransient && !m_wasTransient) {
if (isTransient && m_transientAmnesty == 0) {
if (m_debugLevel > 1) {
std::cerr << "StretchCalculator::calculateSingle: transient found at "
<< inputDurationSoFar << std::endl;
}
m_divergence += m_increment - (m_increment * ratio);
m_wasTransient = true;
// as in offline mode, 0.05 sec approx min between transients
m_transientAmnesty =
lrint(ceil(double(m_sampleRate) / (20 * double(m_increment))));
m_recovery = m_divergence / ((m_sampleRate / 10.0) / m_increment);
return -m_increment;
}
@@ -247,17 +210,16 @@ StretchCalculator::calculateSingle(double ratio,
m_prevRatio = ratio;
}
//!!! want transient amnesty as above (hard peak amnesty)
m_wasTransient = false;
if (m_transientAmnesty > 0) --m_transientAmnesty;
int incr = lrint(m_increment * ratio - m_recovery);
if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) {
std::cerr << "divergence = " << m_divergence << ", recovery = " << m_recovery << ", incr = " << incr << ", ";
}
if (incr < (m_increment * ratio) / 2) {
incr = (m_increment * ratio) / 2;
} else if (incr > m_increment * ratio * 2) {
incr = m_increment * ratio * 2;
if (incr < lrint((m_increment * ratio) / 2)) {
incr = lrint((m_increment * ratio) / 2);
} else if (incr > lrint(m_increment * ratio * 2)) {
incr = lrint(m_increment * ratio * 2);
}
double divdiff = (m_increment * ratio) - incr;
@@ -288,58 +250,112 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
{
std::vector<float> df = smoothDF(rawDf);
// We distinguish between "soft" and "hard" peaks. A soft peak is
// simply the result of peak-picking on the smoothed onset
// detection function, and it represents any (strong-ish) onset.
// We aim to ensure always that soft peaks are placed at the
// correct position in time. A hard peak is where there is a very
// rapid rise in detection function, and it presumably represents
// a more broadband, noisy transient. For these we perform a
// phase reset (if in the appropriate mode), and we locate the
// reset at the first point where we notice enough of a rapid
// rise, rather than necessarily at the peak itself, in order to
// preserve the shape of the transient.
std::set<size_t> hardPeakCandidates;
std::set<size_t> softPeakCandidates;
if (m_useHardPeaks) {
//!!! this should depend on duration based on output increment surely?
// 0.05 sec approx min between hard peaks
size_t hardPeakAmnesty = lrint(ceil(double(m_sampleRate) /
(20 * double(m_increment)))); // 0.05 sec ish
// size_t hardPeakAmnesty = 5;
(20 * double(m_increment))));
size_t prevHardPeak = 0;
std::cerr << "hardPeakAmnesty = " << hardPeakAmnesty << std::endl;
if (m_debugLevel > 1) {
std::cerr << "hardPeakAmnesty = " << hardPeakAmnesty << std::endl;
}
for (size_t i = 1; i + 1 < df.size(); ++i) {
//!!! this ratio configurable? dependent on chunk size and sr?
if (df[i] < 0.1) continue;
if (df[i] <= df[i-1] * 1.2) continue;
if (df[i] <= df[i-1] * 1.1) continue;
if (df[i] < 0.22) continue;
if (df[i] > df[i-1] * 1.4 ||
(df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) ||
df[i] > 0.4) {
if (!hardPeakCandidates.empty() &&
i < prevHardPeak + hardPeakAmnesty) {
continue;
}
size_t peakLocation = i;
if (i + 1 < rawDf.size() &&
rawDf[i + 1] > rawDf[i] * 1.4) {
++peakLocation;
}
if (m_debugLevel > 1) {
std::cerr << "hard peak at " << peakLocation << " (" << df[peakLocation] << " > " << df[peakLocation-1] << " * " << 1.4 << ")" << std::endl;
}
hardPeakCandidates.insert(peakLocation);
prevHardPeak = peakLocation;
if (!hardPeakCandidates.empty() &&
i < prevHardPeak + hardPeakAmnesty) {
continue;
}
bool hard = (df[i] > 0.4);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > absolute " << 0.4
<< std::endl;
}
if (!hard) {
hard = (df[i] > df[i-1] * 1.4);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > prev " << df[i-1] << " * 1.4"
<< std::endl;
}
}
if (!hard && i > 1) {
hard = (df[i] > df[i-1] * 1.2 &&
df[i-1] > df[i-2] * 1.2);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > prev " << df[i-1] << " * 1.2 and "
<< df[i-1] << " > prev " << df[i-2] << " * 1.2"
<< std::endl;
}
}
if (!hard && i > 2) {
// have already established that df[i] > df[i-1] * 1.1
hard = (df[i] > 0.3 &&
df[i-1] > df[i-2] * 1.1 &&
df[i-2] > df[i-3] * 1.1);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > prev " << df[i-1] << " * 1.1 and "
<< df[i-1] << " > prev " << df[i-2] << " * 1.1 and "
<< df[i-2] << " > prev " << df[i-3] << " * 1.1"
<< std::endl;
}
}
if (!hard) continue;
// (df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) ||
// df[i] > 0.4) {
size_t peakLocation = i;
if (i + 1 < rawDf.size() &&
rawDf[i + 1] > rawDf[i] * 1.4) {
++peakLocation;
if (m_debugLevel > 1) {
std::cerr << "pushing hard peak forward to " << peakLocation << ": " << df[peakLocation] << " > " << df[peakLocation-1] << " * " << 1.4 << std::endl;
}
}
hardPeakCandidates.insert(peakLocation);
prevHardPeak = peakLocation;
}
}
//!!! we don't yet do the right thing with soft peaks. if
//!useHardPeaks, we should be resetting on soft peaks; if
//useHardPeaks, we should be ignoring soft peaks if they occur
//shortly after hard ones, otherwise either resetting on them, or
//at least making sure they fall at the correct sample time
// int mediansize = lrint(ceil(double(m_sampleRate) /
// (4 * double(m_increment)))); // 0.25 sec ish
size_t medianmaxsize = lrint(ceil(double(m_sampleRate) /
double(m_increment))); // 1 sec ish
// int mediansize = lrint(ceil(double(m_sampleRate) /
// (2 * double(m_increment)))); // 0.5 sec ish
if (m_debugLevel > 1) {
std::cerr << "mediansize = " << medianmaxsize << std::endl;
@@ -382,16 +398,13 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
if (mediansize < 2) {
if (mediansize > medianmaxsize) { // absurd, but never mind that
// std::cerr << "(<2) pop front ";
medianwin.pop_front();
}
if (nextDf < df.size()) {
// std::cerr << "(<2) push back " << df[nextDf] << " ";
medianwin.push_back(df[nextDf]);
} else {
medianwin.push_back(0);
}
// std::cerr << "(<2) continue" << std::endl;
continue;
}
@@ -411,16 +424,16 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
if (index == sorted.size()-1 && index > 0) --index;
float thresh = sorted[index];
if (m_debugLevel > 2) {
// if (m_debugLevel > 2) {
// std::cerr << "medianwin[" << middle << "] = " << medianwin[middle] << ", thresh = " << thresh << std::endl;
if (medianwin[middle] == 0.f) {
// if (medianwin[middle] == 0.f) {
// std::cerr << "contents: ";
for (size_t j = 0; j < medianwin.size(); ++j) {
// for (size_t j = 0; j < medianwin.size(); ++j) {
// std::cerr << medianwin[j] << " ";
}
// }
// std::cerr << std::endl;
}
}
// }
// }
if (medianwin[middle] > thresh &&
medianwin[middle] > medianwin[middle-1] &&
@@ -439,31 +452,21 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
}
}
//!!! we should distinguish between soft peaks (any found
//using the above method) and hard peaks, which also show
//a very rapid rise in detection function prior to the
//peak (the first value after the rise is not necessarily
//the peak itself, but it is probably where we should
//locate the phase reset). For hard peaks we need to
//reset in time to preserve the shape of the transient
//(unless some option is set to soft mode), for soft peaks
//we just want to avoid poor timing positioning so we
//build up to the reset at the exact peak moment.
// size_t peak = i + maxindex - mediansize;
size_t peak = i + maxindex - middle;
// std::cerr << "i = " << i << ", maxindex = " << maxindex << ", middle = " << middle << ", so peak at " << peak << std::endl;
// if (peak > 0) --peak; //!!! that's a fudge
if (softPeakCandidates.empty() || lastSoftPeak != peak) {
if (m_debugLevel > 1) {
std::cerr << "soft peak at " << peak << " (" << peak * m_increment << "): "
<< medianwin[middle] << " > " << thresh << " and "
<< medianwin[middle] << " > " << medianwin[middle-1] << " and "
<< medianwin[middle] << " > " << medianwin[middle+1]
std::cerr << "soft peak at " << peak << " ("
<< peak * m_increment << "): "
<< medianwin[middle] << " > "
<< thresh << " and "
<< medianwin[middle]
<< " > " << medianwin[middle-1] << " and "
<< medianwin[middle]
<< " > " << medianwin[middle+1]
<< std::endl;
}
@@ -484,56 +487,65 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
} else if (softPeakAmnesty > 0) --softPeakAmnesty;
// std::cerr << "i = " << i << " ";
if (mediansize >= medianmaxsize) {
// std::cerr << "(>= " << medianmaxsize << ") pop front ";
medianwin.pop_front();
}
if (nextDf < df.size()) {
// std::cerr << "(" << nextDf << "<" << df.size() << ") push back " << df[nextDf] << " ";
medianwin.push_back(df[nextDf]);
} else {
medianwin.push_back(0);
}
// std::cerr << "continue" << std::endl;
}
std::vector<Peak> peaks;
//!!!
// if (!softPeakCandidates.empty()) {
// std::cerr << "clearing " << softPeakCandidates.size() << " soft peak candidates" << std::endl;
// }
// softPeakCandidates.clear();
while (!hardPeakCandidates.empty() || !softPeakCandidates.empty()) {
bool haveHardPeak = !hardPeakCandidates.empty();
bool haveSoftPeak = !softPeakCandidates.empty();
size_t hardPeak = (haveHardPeak ? *hardPeakCandidates.begin() : 0);
size_t softPeak = (haveSoftPeak ? *softPeakCandidates.begin() : 0);
Peak peak;
peak.hard = false;
peak.chunk = softPeak;
bool ignore = false;
if (haveHardPeak &&
(!haveSoftPeak || hardPeak <= softPeak)) {
if (m_debugLevel > 2) {
std::cerr << "Hard peak: " << hardPeak << std::endl;
}
peak.hard = true;
peak.chunk = hardPeak;
hardPeakCandidates.erase(hardPeakCandidates.begin());
} else {
if (m_debugLevel > 2) {
std::cerr << "Soft peak: " << softPeak << std::endl;
}
if (!peaks.empty() &&
peaks[peaks.size()-1].hard &&
peaks[peaks.size()-1].chunk + 3 >= softPeak) {
if (m_debugLevel > 2) {
std::cerr << "(ignoring, as we just had a hard peak)"
<< std::endl;
}
ignore = true;
}
}
if (haveSoftPeak && peak.chunk == softPeak) {
softPeakCandidates.erase(softPeakCandidates.begin());
}
peaks.push_back(peak);
if (!ignore) {
peaks.push_back(peak);
}
}
return peaks;
@@ -551,10 +563,6 @@ StretchCalculator::smoothDF(const std::vector<float> &df)
total += df[i]; ++count;
if (i+1 < df.size()) { total += df[i+1]; ++count; }
float mean = total / count;
// if (isnan(mean)) {
// std::cerr << "ERROR: mean at " << i << " (of " << df.size() << ") is NaN: dfs are: "
// << df[i-1] << ", " << df[i] << ", " << df[i+1] << std::endl;
// }
smoothedDF.push_back(mean);
}
@@ -574,9 +582,9 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
// the region, we should set all the values up to that point to
// the same value as the peak.
//!!! this is not subtle enough, especially if the region is long
//-- we want a bound that corresponds to acoustic perception of
//the audible bounce
// (This might not be subtle enough, especially if the region is
// long -- we want a bound that corresponds to acoustic perception
// of the audible bounce.)
for (size_t i = 1; i < df.size()/2; ++i) {
if (df[i] < df[i-1]) {
@@ -600,16 +608,15 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
// tending back towards the maximum df, so that the stretchiness
// reduces at the end of the stretched region.
int reducedRegion = (0.1 * m_sampleRate) / m_increment;
if (reducedRegion > df.size()/5) reducedRegion = df.size()/5;
int reducedRegion = lrint((0.1 * m_sampleRate) / m_increment);
if (reducedRegion > int(df.size()/5)) reducedRegion = df.size()/5;
for (size_t i = 0; i < reducedRegion; ++i) {
for (int i = 0; i < reducedRegion; ++i) {
size_t index = df.size() - reducedRegion + i;
df[index] = df[index] + ((maxDf - df[index]) * i) / reducedRegion;
}
long toAllot = long(duration) - long(m_increment * df.size());
// bool negative = (toAllot < 0);
if (m_debugLevel > 1) {
std::cerr << "region of " << df.size() << " chunks, output duration " << duration << ", toAllot " << toAllot << std::endl;
@@ -617,20 +624,25 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
size_t totalIncrement = 0;
//!!! we need to place limits on the amount of displacement per
//chunk. if ratio < 0, no increment should be larger than
//increment*ratio or smaller than increment*ratio/2; if ratio > 0,
//none should be smaller than increment*ratio or larger than
//increment*ratio*2. We need to enforce this in the assignment of
//displacements to allotments, not by trying to respond if
//something turns out wrong
// We place limits on the amount of displacement per chunk. if
// ratio < 0, no increment should be larger than increment*ratio
// or smaller than increment*ratio/2; if ratio > 0, none should be
// smaller than increment*ratio or larger than increment*ratio*2.
// We need to enforce this in the assignment of displacements to
// allotments, not by trying to respond if something turns out
// wrong.
//!!! ratio is only provided to this function for the purposes of
//establishing this bound to the displacement
// Note that the ratio is only provided to this function for the
// purposes of establishing this bound to the displacement.
// so if maxDisplacement / totalDisplacement > increment * ratio*2 - increment (for ratio > 1)
// or maxDisplacement / totalDisplacement < increment * ratio/2 (for ratio < 1)
// then we need to adjust... what?
// so if
// maxDisplacement / totalDisplacement > increment * ratio*2 - increment
// (for ratio > 1)
// or
// maxDisplacement / totalDisplacement < increment * ratio/2
// (for ratio < 1)
// then we need to adjust and accommodate
bool acceptableSquashRange = false;
@@ -663,7 +675,7 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
int extremeIncrement = m_increment + lrint((toAllot * maxDisplacement) / totalDisplacement);
if (ratio < 1.0) {
if (extremeIncrement > lrint(ceil(m_increment * ratio))) {
std::cerr << "ERROR: extreme increment " << extremeIncrement << " > " << m_increment * ratio << " (I thought this couldn't happen?)" << std::endl;
std::cerr << "ERROR: extreme increment " << extremeIncrement << " > " << m_increment * ratio << " (this should not happen)" << std::endl;
} else if (extremeIncrement < (m_increment * ratio) / 2) {
if (m_debugLevel > 0) {
std::cerr << "WARNING: extreme increment " << extremeIncrement << " < " << (m_increment * ratio) / 2 << std::endl;
@@ -684,19 +696,8 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
if (!acceptableSquashRange) {
// Need to make maxDisplacement smaller as a proportion of
// the total displacement, yet ensure that the
// displacements still sum to the total. How?
// std::cerr << "Adjusting df values by " << maxDf/10 << "..." << std::endl;
// std::cerr << "now: ";
// for (size_t i = 0; i < df.size(); ++i) {
// df[i] += maxDf/10;
// std::cerr << df[i] << " ";
// }
// std::cerr << std::endl;
// displacements still sum to the total.
adj += maxDf/10;
//...
}
}
@@ -729,7 +730,9 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
int increment = m_increment + allotment;
if (increment <= 0) {
//!!! this is a serious problem, the allocation is quite wrong if it allows increment to diverge so far from the input increment
// this is a serious problem, the allocation is quite
// wrong if it allows increment to diverge so far from the
// input increment
std::cerr << "*** WARNING: increment " << increment << " <= 0, rounding to zero" << std::endl;
increment = 0;
allotment = increment - m_increment;

View File

@@ -81,7 +81,7 @@ protected:
double m_divergence;
float m_recovery;
float m_prevRatio;
bool m_wasTransient;
int m_transientAmnesty; // only in RT mode; handled differently offline
int m_debugLevel;
bool m_useHardPeaks;

View File

@@ -39,7 +39,7 @@ RubberBandStretcher::Impl::ChannelData::construct(const std::set<size_t> &window
size_t realSize = maxSize/2 + 1; // size of the real "half" of freq data
std::cerr << "ChannelData::construct([" << windowSizes.size() << "], " << maxSize << ", " << outbufSize << ")" << std::endl;
// std::cerr << "ChannelData::construct([" << windowSizes.size() << "], " << maxSize << ", " << outbufSize << ")" << std::endl;
if (outbufSize < maxSize) outbufSize = maxSize;
@@ -97,7 +97,7 @@ RubberBandStretcher::Impl::ChannelData::setWindowSize(size_t windowSize)
size_t oldSize = inbuf->getSize();
size_t realSize = windowSize/2 + 1;
std::cerr << "ChannelData::setWindowSize(" << windowSize << ") [from " << oldSize << "]" << std::endl;
// std::cerr << "ChannelData::setWindowSize(" << windowSize << ") [from " << oldSize << "]" << std::endl;
if (oldSize >= windowSize) {
@@ -203,7 +203,7 @@ RubberBandStretcher::Impl::ChannelData::setOutbufSize(size_t outbufSize)
{
size_t oldSize = outbuf->getSize();
std::cerr << "ChannelData::setOutbufSize(" << outbufSize << ") [from " << oldSize << "]" << std::endl;
// std::cerr << "ChannelData::setOutbufSize(" << outbufSize << ") [from " << oldSize << "]" << std::endl;
if (oldSize < outbufSize) {

View File

@@ -35,9 +35,16 @@ using std::min;
namespace RubberBand {
static const size_t defaultIncrement = 256;
static const size_t defaultWindowSize = 2048;
const size_t
RubberBandStretcher::Impl::m_defaultIncrement = 256;
const size_t
RubberBandStretcher::Impl::m_defaultWindowSize = 2048;
int
RubberBandStretcher::Impl::m_defaultDebugLevel = 0;
RubberBandStretcher::Impl::Impl(RubberBandStretcher *stretcher,
size_t sampleRate,
size_t channels,
@@ -48,15 +55,15 @@ RubberBandStretcher::Impl::Impl(RubberBandStretcher *stretcher,
m_channels(channels),
m_timeRatio(initialTimeRatio),
m_pitchScale(initialPitchScale),
m_windowSize(defaultWindowSize),
m_increment(defaultIncrement),
m_outbufSize(defaultWindowSize * 2),
m_maxProcessSize(defaultWindowSize),
m_windowSize(m_defaultWindowSize),
m_increment(m_defaultIncrement),
m_outbufSize(m_defaultWindowSize * 2),
m_maxProcessSize(m_defaultWindowSize),
m_expectedInputDuration(0),
m_threaded(false),
m_realtime(false),
m_options(options),
m_debugLevel(1),
m_debugLevel(m_defaultDebugLevel),
m_mode(JustCreated),
m_window(0),
m_studyFFT(0),
@@ -69,19 +76,31 @@ RubberBandStretcher::Impl::Impl(RubberBandStretcher *stretcher,
m_freq0(600),
m_freq1(1200),
m_freq2(12000),
m_baseWindowSize(defaultWindowSize)
m_baseWindowSize(m_defaultWindowSize)
{
cerr << "RubberBandStretcher::Impl::Impl: options = " << options << endl;
if (m_debugLevel > 0) {
cerr << "RubberBandStretcher::Impl::Impl: rate = " << m_stretcher->m_sampleRate << ", options = " << options << endl;
}
// Window size will vary according to the audio sample rate, but
// we don't let it drop below the 48k default
m_rateMultiple = float(m_stretcher->m_sampleRate) / 48000.f;
if (m_rateMultiple < 1.f) m_rateMultiple = 1.f;
m_baseWindowSize = roundUp(int(m_defaultWindowSize * m_rateMultiple));
if ((options & OptionWindowShort) || (options & OptionWindowLong)) {
if ((options & OptionWindowShort) && (options & OptionWindowLong)) {
cerr << "RubberBandStretcher::Impl::Impl: Cannot specify OptionWindowLong and OptionWindowShort together; falling back to OptionWindowStandard" << endl;
} else if (options & OptionWindowShort) {
m_baseWindowSize = defaultWindowSize / 2;
cerr << "setting baseWindowSize to " << m_baseWindowSize << endl;
m_baseWindowSize = m_baseWindowSize / 2;
if (m_debugLevel > 0) {
cerr << "setting baseWindowSize to " << m_baseWindowSize << endl;
}
} else if (options & OptionWindowLong) {
m_baseWindowSize = defaultWindowSize * 2;
cerr << "setting baseWindowSize to " << m_baseWindowSize << endl;
m_baseWindowSize = m_baseWindowSize * 2;
if (m_debugLevel > 0) {
cerr << "setting baseWindowSize to " << m_baseWindowSize << endl;
}
}
m_windowSize = m_baseWindowSize;
m_outbufSize = m_baseWindowSize * 2;
@@ -278,7 +297,7 @@ RubberBandStretcher::Impl::roundUp(size_t value)
void
RubberBandStretcher::Impl::calculateSizes()
{
size_t inputIncrement = defaultIncrement;
size_t inputIncrement = m_defaultIncrement;
size_t windowSize = m_baseWindowSize;
size_t outputIncrement;
@@ -288,7 +307,7 @@ RubberBandStretcher::Impl::calculateSizes()
// use a fixed input increment
inputIncrement = defaultIncrement;
inputIncrement = roundUp(int(m_defaultIncrement * m_rateMultiple));
if (r < 1) {
outputIncrement = int(floor(inputIncrement * r));
@@ -340,7 +359,7 @@ RubberBandStretcher::Impl::calculateSizes()
}
// windowSize can be almost anything, but it can't be greater than
// 4 * defaultWindowSize unless ratio is less than 1/1024.
// 4 * m_baseWindowSize unless ratio is less than 1/1024.
m_windowSize = windowSize;
m_increment = inputIncrement;
@@ -357,15 +376,6 @@ RubberBandStretcher::Impl::calculateSizes()
cerr << "configure: window size = " << m_windowSize << ", increment = " << m_increment << " (approx output increment = " << int(lrint(m_increment * getEffectiveRatio())) << ")" << endl;
}
static size_t maxWindowSize = 0;
if (m_windowSize > maxWindowSize) {
//!!!
cerr << "configure: NOTE: max window size so far increased from "
<< maxWindowSize << " to " << m_windowSize << endl;
maxWindowSize = m_windowSize;
}
if (m_windowSize > m_maxProcessSize) {
m_maxProcessSize = m_windowSize;
}
@@ -606,22 +616,19 @@ RubberBandStretcher::Impl::getLatency() const
void
RubberBandStretcher::Impl::setTransientsOption(Options options)
{
//!!!
if (options & OptionTransientsSmooth) {
m_options |= OptionTransientsSmooth;
} else {
m_options &= ~OptionTransientsSmooth;
}
m_options &= ~(OptionTransientsMixed |
OptionTransientsSmooth |
OptionTransientsCrisp);
m_options |= options;
}
void
RubberBandStretcher::Impl::setPhaseOption(Options options)
{
if (options & OptionPhaseIndependent) {
m_options |= OptionPhaseIndependent;
} else {
m_options &= ~OptionPhaseIndependent;
}
m_options &= ~(OptionPhaseAdaptive |
OptionPhasePeakLocked |
OptionPhaseIndependent);
m_options |= options;
}
void

View File

@@ -80,6 +80,7 @@ public:
void calculateStretch();
void setDebugLevel(int level);
static void setDefaultDebugLevel(int level) { m_defaultDebugLevel = level; }
protected:
RubberBandStretcher *m_stretcher;
@@ -174,10 +175,14 @@ protected:
float m_freq2;
size_t m_baseWindowSize;
float m_rateMultiple;
void writeOutput(RingBuffer<float> &to, float *from,
size_t qty, size_t &outCount, size_t theoreticalOut);
static int m_defaultDebugLevel;
static const size_t m_defaultIncrement;
static const size_t m_defaultWindowSize;
};
}

View File

@@ -482,25 +482,32 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
cd.freqPeak[0] = 0;
float freq0 = m_freq0;
float freq1 = m_freq1;
float freq2 = m_freq2;
// As the stretch ratio increases, so the frequency thresholds
// for phase lamination should increase. Beyond a ratio of
// about 1.5, the threshold should be about 1200Hz; beyond a
// ratio of 2, we probably want no lamination to happen at all
// by default. This calculation aims for that.
// by default. This calculation aims for more or less that.
// We only do this if the phase option is OptionPhaseAdaptive
// (the default), i.e. not Independent or PeakLocked.
//!!! we should only do this if asked to -- and when not
//setting f0,f1,f2 explicitly
float r = getEffectiveRatio();
if (r > 1) {
float rf0 = 600 + (600 * ((r-1)*(r-1)*2));
// std::cerr << "ratio = " << r << ", rf0 = " << rf0 << std::endl;
freq0 = std::max(freq0, rf0);
if (!(m_options & OptionPhasePeakLocked)) {
float r = getEffectiveRatio();
if (r > 1) {
float rf0 = 600 + (600 * ((r-1)*(r-1)*(r-1)*2));
float f1ratio = freq1 / freq0;
float f2ratio = freq2 / freq0;
freq0 = std::max(freq0, rf0);
freq1 = freq0 * f1ratio;
freq2 = freq0 * f2ratio;
}
}
size_t limit0 = lrint((freq0 * m_windowSize) / rate);
size_t limit1 = lrint((m_freq1 * m_windowSize) / rate);
size_t limit2 = lrint((m_freq2 * m_windowSize) / rate);
size_t limit1 = lrint((freq1 * m_windowSize) / rate);
size_t limit2 = lrint((freq2 * m_windowSize) / rate);
size_t range = 0;
@@ -514,11 +521,6 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
for (size_t i = 0; i <= count; ++i) {
double mag = cd.mag[i];
//!!! N.B. if the stretch ratio is very long, it's generally
//better not to attempt this phase lamination -- stick with
//range==0 throughout.
bool isPeak = true;
for (size_t j = 1; j <= range; ++j) {
@@ -587,9 +589,7 @@ RubberBandStretcher::Impl::modifyChunk(size_t channel, size_t outputIncrement,
bool resetThis = phaseReset;
if (!(m_options & OptionTransientsSmooth) &&
!(m_options & OptionTransientsCrisp)) {
// must be OptionTransientsMixed
if (m_options & OptionTransientsMixed) {
size_t low = lrint((150 * m_windowSize) / rate);
size_t high = lrint((1000 * m_windowSize) / rate);
if (resetThis) {
@@ -827,10 +827,11 @@ RubberBandStretcher::Impl::available() const
if (!m_threaded) {
for (size_t c = 0; c < m_channels; ++c) {
if (m_channelData[c]->inputSize >= 0) {
cerr << "available: m_done true" << endl;
// cerr << "available: m_done true" << endl;
if (m_channelData[c]->inbuf->getReadSpace() > 0) {
cerr << "calling processChunks(" << c << ") from available" << endl;
// cerr << "calling processChunks(" << c << ") from available" << endl;
//!!! do we ever actually do this? if so, this method should not be const
// ^^^ yes, we do sometimes -- e.g. when fed a very short file
((RubberBandStretcher::Impl *)this)->processChunks(c);
}
}

View File

@@ -22,6 +22,9 @@
#include <getopt.h>
// for import and export of FFTW wisdom
#include <fftw3.h>
using namespace std;
using namespace RubberBand;
@@ -32,15 +35,19 @@ int main(int argc, char **argv)
double ratio = 1.0;
double pitchshift = 1.0;
double frequencyshift = 1.0;
int debug = 1;
int debug = 0;
bool realtime = false;
bool precise = false;
bool threaded = true;
bool peaklock = true;
bool longwin = false;
bool shortwin = false;
bool softening = true;
int crispness = -1;
bool help = false;
bool quiet = false;
bool haveRatio = false;
enum {
NoTransients,
@@ -53,7 +60,6 @@ int main(int argc, char **argv)
float fthresh2 = -1.f;
while (1) {
int thisOptind = optind ? optind : 1;
int optionIndex = 0;
static struct option longOpts[] = {
@@ -76,18 +82,20 @@ int main(int argc, char **argv)
{ "thresh1", 1, 0, '6' },
{ "thresh2", 1, 0, '7' },
{ "bl-transients", 0, 0, '8' },
{ "no-softening", 0, 0, '9' },
{ "quiet", 0, 0, 'q' },
{ 0, 0, 0 }
};
c = getopt_long(argc, argv, "t:p:d:RPc:f:", longOpts, &optionIndex);
c = getopt_long(argc, argv, "t:p:d:RPc:f:qh", longOpts, &optionIndex);
if (c == -1) break;
switch (c) {
case 'h': help = true; break;
case 't': ratio *= atof(optarg); break;
case 'T': { double m = atof(optarg); if (m != 0.0) ratio /= m; } break;
case 'p': pitchshift = atof(optarg); break;
case 'f': frequencyshift = atof(optarg); break;
case 't': ratio *= atof(optarg); haveRatio = true; break;
case 'T': { double m = atof(optarg); if (m != 0.0) ratio /= m; }; haveRatio = true; break;
case 'p': pitchshift = atof(optarg); haveRatio = true; break;
case 'f': frequencyshift = atof(optarg); haveRatio = true; break;
case 'd': debug = atoi(optarg); break;
case 'R': realtime = true; break;
case 'P': precise = true; break;
@@ -100,20 +108,22 @@ int main(int argc, char **argv)
case '6': fthresh1 = atof(optarg); break;
case '7': fthresh2 = atof(optarg); break;
case '8': transients = BandLimitedTransients; break;
case '9': softening = false; break;
case 'c': crispness = atoi(optarg); break;
default: break;
case 'q': quiet = true; break;
default: help = true; break;
}
}
if (help || optind + 2 != argc) {
if (help || !haveRatio || optind + 2 != argc) {
cerr << endl;
cerr << "Rubber Band" << endl;
cerr << "An audio time-stretching and pitch-shifting library and utility program." << endl;
cerr << "Copyright 2007 Chris Cannam. Distributed under the GNU General Public License." << endl;
cerr << endl;
cerr << "Usage: " << argv[0] << " [options] <infile.wav> <outfile.wav>" << endl;
cerr << " Usage: " << argv[0] << " [options] <infile.wav> <outfile.wav>" << endl;
cerr << endl;
cerr << "where options may be:" << endl;
cerr << "You must specify at least one of the following time and pitch ratio options." << endl;
cerr << endl;
cerr << " -t<X>, --time <X> Stretch to X times original duration, or" << endl;
cerr << " -T<X>, --tempo <X> Change tempo by multiple X (equivalent to --time 1/X)" << endl;
@@ -121,9 +131,12 @@ int main(int argc, char **argv)
cerr << " -p<X>, --pitch <X> Raise pitch by X semitones, or" << endl;
cerr << " -f<X>, --frequency <X> Change frequency by multiple X" << endl;
cerr << endl;
cerr << " -c<N>, --crisp <N> Crispness (N = 0,1,2,3); default 2 (see below)" << endl;
cerr << "The following option provides a simple way to adjust the sound. See below" << endl;
cerr << "for more details." << endl;
cerr << endl;
cerr << "The following options adjust the processing mode and stretch algorithm." << endl;
cerr << " -c<N>, --crisp <N> Crispness (N = 0,1,2,3,4,5); default 4 (see below)" << endl;
cerr << endl;
cerr << "The remaining options fine-tune the processing mode and stretch algorithm." << endl;
cerr << "These are mostly included for test purposes; the default settings and standard" << endl;
cerr << "crispness parameter are intended to provide the best sounding set of options" << endl;
cerr << "for most situations." << endl;
@@ -132,31 +145,38 @@ int main(int argc, char **argv)
cerr << " -R, --realtime Select realtime mode (implies -P --no-threads)" << endl;
cerr << " --no-threads No extra threads regardless of cpus/channel count" << endl;
cerr << " --no-transients Disable phase resynchronisation at transients" << endl;
cerr << " --bl-transients Band-limit phase resync to extreme frequencies" << endl;
cerr << " --no-peaklock Disable phase locking to peak frequencies" << endl;
cerr << " --no-softening Disable large-ratio softening of phase locking" << endl;
cerr << " --window-long Use longer processing window (actual size may vary)" << endl;
cerr << " --window-short Use shorter processing window" << endl;
cerr << " --thresh<N> <F> Set internal freq threshold N (N = 0,1,2) to F Hz" << endl;
cerr << endl;
cerr << " -d<N>, --debug <N> Select debug level (N = 0,1,2,3); default 1, full 3" << std::endl;
cerr << " -d<N>, --debug <N> Select debug level (N = 0,1,2,3); default 0, full 3" << endl;
cerr << " (N.B. debug level 3 includes audible ticks in output)" << endl;
cerr << " -q, --quiet Suppress progress output" << endl;
cerr << endl;
cerr << " -h, --help Show this help" << endl;
cerr << endl;
cerr << "\"Crispness\" levels:" << endl;
cerr << " -c 0 equivalent to --no-transients --no-peaklock" << endl;
cerr << " -c 1 equivalent to --no-peaklock" << endl;
cerr << " -c 2 default processing options" << endl;
cerr << " -c 3 equivalent to --no-peaklock --window-short (may be suitable for drums)" << endl;
cerr << " -c 0 equivalent to --no-transients --no-peaklock --window-long" << endl;
cerr << " -c 1 equivalent to --no-transients --no-peaklock" << endl;
cerr << " -c 2 equivalent to --no-transients" << endl;
cerr << " -c 3 equivalent to --bl-transients" << endl;
cerr << " -c 4 default processing options" << endl;
cerr << " -c 5 equivalent to --no-peaklock --window-short (may be suitable for drums)" << endl;
cerr << endl;
return 2;
}
switch (crispness) {
case -1: crispness = 2; break;
case 0: transients = NoTransients; peaklock = false; longwin = false; shortwin = false; break;
case 1: transients = Transients; peaklock = false; longwin = false; shortwin = false; break;
case 2: transients = Transients; peaklock = true; longwin = false; shortwin = false; break;
case 3: transients = Transients; peaklock = false; longwin = false; shortwin = true; break;
case -1: crispness = 4; break;
case 0: transients = NoTransients; peaklock = false; longwin = true; shortwin = false; break;
case 1: transients = NoTransients; peaklock = false; longwin = false; shortwin = false; break;
case 2: transients = NoTransients; peaklock = true; longwin = false; shortwin = false; break;
case 3: transients = BandLimitedTransients; peaklock = true; longwin = false; shortwin = false; break;
case 4: transients = Transients; peaklock = true; longwin = false; shortwin = false; break;
case 5: transients = Transients; peaklock = false; longwin = false; shortwin = true; break;
};
char *fileName = strdup(argv[optind++]);
@@ -195,8 +215,8 @@ int main(int argc, char **argv)
RubberBandStretcher::Options options = 0;
if (realtime) options |= RubberBandStretcher::OptionProcessRealTime;
if (precise) options |= RubberBandStretcher::OptionStretchPrecise;
// if (!transients) options |= RubberBandStretcher::OptionTransientsSmooth;
if (!peaklock) options |= RubberBandStretcher::OptionPhaseIndependent;
if (!softening) options |= RubberBandStretcher::OptionPhasePeakLocked;
if (!threaded) options |= RubberBandStretcher::OptionThreadingNone;
if (longwin) options |= RubberBandStretcher::OptionWindowLong;
if (shortwin) options |= RubberBandStretcher::OptionWindowShort;
@@ -217,16 +237,13 @@ int main(int argc, char **argv)
frequencyshift *= pow(2.0, pitchshift / 12);
}
RubberBandStretcher::setDefaultDebugLevel(debug);
RubberBandStretcher ts(sfinfo.samplerate, channels, options,
ratio, frequencyshift);
ts.setDebugLevel(debug);
ts.setExpectedInputDuration(sfinfo.frames);
// ts.setTimeRatio(ratio);
// ts.setPitchScale(pitchshift);
float *fbuf = new float[channels * ibs];
float **ibuf = new float *[channels];
for (size_t i = 0; i < channels; ++i) ibuf[i] = new float[ibs];
@@ -239,12 +256,12 @@ int main(int argc, char **argv)
if (!realtime) {
cerr << "First pass (studying)..." << endl;
if (!quiet) {
cerr << "Pass 1: Studying..." << endl;
}
while (frame < sfinfo.frames) {
// std::cout << "study frame " << frame << std::endl;
int count = -1;
if (sf_seek(sndfile, frame, SEEK_SET) < 0) break;
@@ -264,15 +281,17 @@ int main(int argc, char **argv)
int p = int((double(frame) * 100.0) / sfinfo.frames);
if (p > percent || frame == 0) {
percent = p;
cerr << "\r" << percent << "% ";
if (!quiet) {
cerr << "\r" << percent << "% ";
}
}
frame += ibs;
}
cerr << endl;
cerr << "Second pass (processing)..." << endl;
if (!quiet) {
cerr << "\rCalculating profile..." << endl;
}
}
frame = 0;
@@ -306,13 +325,8 @@ int main(int argc, char **argv)
ts.process(ibuf, count, final);
// if
// std::cerr << frame << " + " << ibs << " >= " << sfinfo.frames << ": calling ts.complete()!" << std::endl;
// ts.complete();
// }
int avail = ts.available();
if (debug > 1) std::cerr << "available = " << avail << std::endl;
if (debug > 1) cerr << "available = " << avail << endl;
if (avail > 0) {
float **obf = new float *[channels];
@@ -323,7 +337,7 @@ int main(int argc, char **argv)
countOut += avail;
float *fobf = new float[channels * avail];
for (size_t c = 0; c < channels; ++c) {
for (size_t i = 0; i < avail; ++i) {
for (int i = 0; i < avail; ++i) {
float value = obf[c][i];
if (fabsf(value) > outpeak) outpeak = fabsf(value);
outsum += value * value;
@@ -333,13 +347,13 @@ int main(int argc, char **argv)
fobf[i * channels + c] = value;
}
}
// std::cout << "fobf mean: ";
// cout << "fobf mean: ";
// double d = 0;
// for (int i = 0; i < avail; ++i) {
// d += fobf[i];
// }
// d /= avail;
// std::cout << d << std::endl;
// cout << d << endl;
sf_writef_float(sndfileOut, fobf, avail);
delete[] fobf;
for (size_t i = 0; i < channels; ++i) {
@@ -348,20 +362,31 @@ int main(int argc, char **argv)
delete[] obf;
}
if (frame == 0 && !realtime && !quiet) {
cerr << "Pass 2: Processing..." << endl;
}
int p = int((double(frame) * 100.0) / sfinfo.frames);
if (p > percent || frame == 0) {
percent = p;
cerr << "\r" << percent << "% ";
if (!quiet) {
cerr << "\r" << percent << "% ";
}
}
frame += ibs;
}
if (!quiet) {
cerr << "\r " << endl;
}
int avail;
while ((avail = ts.available()) >= 0) {
if (debug > 1) std::cerr << "(completing) available = " << avail << std::endl;
if (debug > 1) {
cerr << "(completing) available = " << avail << endl;
}
if (avail > 0) {
float **obf = new float *[channels];
@@ -372,7 +397,7 @@ int main(int argc, char **argv)
countOut += avail;
float *fobf = new float[channels * avail];
for (size_t c = 0; c < channels; ++c) {
for (size_t i = 0; i < avail; ++i) {
for (int i = 0; i < avail; ++i) {
float value = obf[c][i];
if (fabsf(value) > outpeak) outpeak = fabsf(value);
outsum += value * value;
@@ -389,6 +414,8 @@ int main(int argc, char **argv)
delete[] obf[i];
}
delete[] obf;
} else {
usleep(10000);
}
}
@@ -398,28 +425,26 @@ int main(int argc, char **argv)
double inmean = sqrt(insum / (sfinfo.frames * sfinfo.channels));
double outmean = sqrt(outsum / (countOut * sfinfo.channels));
cerr << endl << "in: " << countIn << ", out: " << countOut << ", ratio: " << float(countOut)/float(countIn) << ", ideal output: " << lrint(countIn * ratio) << ", diff: " << abs(lrint(countIn * ratio) - int(countOut)) << endl;
if (!quiet) {
cerr << "input peak: " << inpeak << "; output peak " << outpeak << "; gain " << (inpeak > 0 ? outpeak/inpeak : 1) << endl;
cerr << "input rms: " << inmean << "; output rms " << outmean << "; gain " << (inmean > 0 ? outmean/inmean : 1) << endl;
cerr << "in: " << countIn << ", out: " << countOut << ", ratio: " << float(countOut)/float(countIn) << ", ideal output: " << lrint(countIn * ratio) << ", error: " << abs(lrint(countIn * ratio) - int(countOut)) << endl;
struct timeval etv;
(void)gettimeofday(&etv, 0);
cerr << "input peak: " << inpeak << "; output peak " << outpeak << "; gain " << (inpeak > 0 ? outpeak/inpeak : 1) << endl;
cerr << "input rms: " << inmean << "; output rms " << outmean << "; gain " << (inmean > 0 ? outmean/inmean : 1) << endl;
cerr << "\nstart: " << tv.tv_sec << ":" << tv.tv_usec << endl;
cerr << "finish: " << etv.tv_sec << ":" << etv.tv_usec << endl;
etv.tv_sec -= tv.tv_sec;
if (etv.tv_usec < tv.tv_usec) {
etv.tv_usec += 1000000;
etv.tv_sec -= 1;
struct timeval etv;
(void)gettimeofday(&etv, 0);
etv.tv_sec -= tv.tv_sec;
if (etv.tv_usec < tv.tv_usec) {
etv.tv_usec += 1000000;
etv.tv_sec -= 1;
}
etv.tv_usec -= tv.tv_usec;
double sec = double(etv.tv_sec) + (double(etv.tv_usec) / 1000000.0);
cerr << "\nelapsed time: " << sec << " sec, in frames/sec: " << countIn/sec << ", out frames/sec: " << countOut/sec << endl;
}
etv.tv_usec -= tv.tv_usec;
cerr << "elapsed: " << etv.tv_sec << ":" << etv.tv_usec << endl;
double sec = double(etv.tv_sec) + (double(etv.tv_usec) / 1000000.0);
cerr << "\nin/sec: " << countIn/sec << ", out/sec: " << countOut/sec << endl;
return 0;
}