* Improvements to offline phase reset point detection

* Better support for higher sample rates
* Save and restore FFTW wisdom (needs work still)
* More tidying, options and argument overhauls
This commit is contained in:
Chris Cannam
2007-11-20 20:17:13 +00:00
parent e9cb6dbc37
commit 7c4fcd85da
11 changed files with 408 additions and 322 deletions

View File

@@ -32,10 +32,10 @@ StretchCalculator::StretchCalculator(size_t sampleRate,
m_divergence(0),
m_recovery(0),
m_prevRatio(1.0),
m_wasTransient(false),
m_transientAmnesty(0),
m_useHardPeaks(useHardPeaks)
{
std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl;
// std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl;
}
StretchCalculator::~StretchCalculator()
@@ -47,38 +47,6 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
const std::vector<float> &phaseResetDf,
const std::vector<float> &stretchDf)
{
// Method:
//!!! This description is out of date.
// 1. Pre-process the df array, and for each (say) one second's
// worth of values, calculate the number of peaks that would
// qualify for phase reset given the default threshold. Then
// reduce or increase the threshold by stages until that number is
// in a sensible range (say 1-10 peaks per second -- the low end
// is harder to estimate than the high end, so it may be better to
// start with a high sensitivity and reduce it).
// 2. Record the positions of peaks, and separately the positions
// of those peaks that qualify for reset using the sliding
// threshold window. Don't permit two locked peaks within a very
// short time frame (e.g. 30-50ms).
// 3. Map each of the locked peaks (or any peaks that are over a
// given intensity?), as well as the start and end points, to a
// proportionate position in the newly stretched array so as to
// ensure that their timing is strictly "correct".
// 4. Calculate how much time is left in the stretch total, after
// each of the locked chunks has been allocated its static
// allowance. Also count the non-locked chunks.
// 5. For each region between two locked chunks, calculate the
// number of samples to allocate that region given the time
// available for stretch and the number of non-locked chunks.
// Then divvy them up... how exactly?
assert(phaseResetDf.size() == stretchDf.size());
m_lastPeaks = findPeaks(phaseResetDf);
@@ -89,13 +57,10 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
size_t outputDuration = lrint(inputDuration * ratio);
std::cerr << "debug level: " << m_debugLevel << std::endl;
if (m_debugLevel > 0) {
std::cerr << "StretchCalculator::calculate(): inputDuration " << inputDuration << ", ratio " << ratio << ", outputDuration " << outputDuration;
}
//!!! round down?
outputDuration = lrint((phaseResetDf.size() * m_increment) * ratio);
if (m_debugLevel > 0) {
@@ -103,41 +68,32 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
std::cerr << ", df size " << phaseResetDf.size() << std::endl;
}
// size_t stretchable = outputDuration - lockCount * m_increment;
std::vector<size_t> fixedAudioChunks;
for (size_t i = 0; i < peaks.size(); ++i) {
fixedAudioChunks.push_back
//!!! this should be rounding down, shouldn't it? not lrint?
(lrint((double(peaks[i].chunk) * outputDuration) / totalCount));
}
// size_t lockIndex = 0;
if (m_debugLevel > 1) {
std::cerr << "have " << peaks.size() << " fixed positions" << std::endl;
}
size_t totalInput = 0, totalOutput = 0;
// so for each inter-lock region, we want to take the number of
// output chunks to be allocated and the detection function values
// within the range, and produce a series of increments that sum
// to the number of output chunks, such that each increment is
// displaced from the input increment by an amount inversely
// proportional to the magnitude of the detection function at that
// input step. Ideally the detection function would have been
// somewhat smoothed for this purpose but we'll start raw.
//!!! Actually, we would possibly be better off using a fixed
// smooth curve than the detection function itself.
// For each region between two consecutive time sync points, we
// want to take the number of output chunks to be allocated and
// the detection function values within the range, and produce a
// series of increments that sum to the number of output chunks,
// such that each increment is displaced from the input increment
// by an amount inversely proportional to the magnitude of the
// stretch detection function at that input step.
size_t regionTotalChunks = 0;
for (size_t i = 0; i <= peaks.size(); ++i) {
size_t regionStart, regionStartChunk, regionEnd, regionEndChunk;
bool phaseLock = false;
bool phaseReset = false;
if (i == 0) {
regionStartChunk = 0;
@@ -145,7 +101,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
} else {
regionStartChunk = peaks[i-1].chunk;
regionStart = fixedAudioChunks[i-1];
phaseLock = peaks[i-1].hard;
phaseReset = peaks[i-1].hard;
}
if (i == peaks.size()) {
@@ -172,7 +128,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
dfRegion = smoothDF(dfRegion);
std::vector<int> regionIncrements = distributeRegion
(dfRegion, regionDuration, ratio, phaseLock);
(dfRegion, regionDuration, ratio, phaseReset);
size_t totalForRegion = 0;
@@ -180,7 +136,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
int incr = regionIncrements[j];
if (j == 0 && phaseLock) increments.push_back(-incr);
if (j == 0 && phaseReset) increments.push_back(-incr);
else increments.push_back(incr);
if (incr > 0) totalForRegion += incr;
@@ -200,6 +156,7 @@ StretchCalculator::calculate(double ratio, size_t inputDuration,
std::cerr << "total input increment = " << totalInput << " (= " << totalInput / m_increment << " chunks), output = " << totalOutput << ", ratio = " << double(totalOutput)/double(totalInput) << ", ideal output " << ceil(totalInput * ratio) << std::endl;
std::cerr << "(region total = " << regionTotalChunks << ")" << std::endl;
}
return increments;
}
@@ -210,13 +167,15 @@ StretchCalculator::calculateSingle(double ratio,
{
bool isTransient = false;
//!!! We want to ensure, as close as possible, that the lock
// points appear at _exactly_ the right audio frame numbers
// We want to ensure, as close as possible, that the phase reset
// points appear at _exactly_ the right audio frame numbers.
// In principle, the threshold depends on chunk size: larger chunk
// sizes need higher thresholds. Since chunk size depends on
// ratio, I suppose we could in theory calculate the threshold
// from the ratio directly. For the moment we're happy if it
// works well in common situations.
//!!! depends on chunk size. larger chunk sizes need higher
//thresholds. since chunk size depends on ratio, I suppose we
//could in theory calculate the threshold from the ratio directly.
//For now we just frig it to work OK for a couple of common cases
float transientThreshold = 0.35;
if (ratio > 1) transientThreshold = 0.25;
@@ -231,13 +190,17 @@ StretchCalculator::calculateSingle(double ratio,
m_prevDf = df;
if (isTransient && !m_wasTransient) {
if (isTransient && m_transientAmnesty == 0) {
if (m_debugLevel > 1) {
std::cerr << "StretchCalculator::calculateSingle: transient found at "
<< inputDurationSoFar << std::endl;
}
m_divergence += m_increment - (m_increment * ratio);
m_wasTransient = true;
// as in offline mode, 0.05 sec approx min between transients
m_transientAmnesty =
lrint(ceil(double(m_sampleRate) / (20 * double(m_increment))));
m_recovery = m_divergence / ((m_sampleRate / 10.0) / m_increment);
return -m_increment;
}
@@ -247,17 +210,16 @@ StretchCalculator::calculateSingle(double ratio,
m_prevRatio = ratio;
}
//!!! want transient amnesty as above (hard peak amnesty)
m_wasTransient = false;
if (m_transientAmnesty > 0) --m_transientAmnesty;
int incr = lrint(m_increment * ratio - m_recovery);
if (m_debugLevel > 2 || (m_debugLevel > 1 && m_divergence != 0)) {
std::cerr << "divergence = " << m_divergence << ", recovery = " << m_recovery << ", incr = " << incr << ", ";
}
if (incr < (m_increment * ratio) / 2) {
incr = (m_increment * ratio) / 2;
} else if (incr > m_increment * ratio * 2) {
incr = m_increment * ratio * 2;
if (incr < lrint((m_increment * ratio) / 2)) {
incr = lrint((m_increment * ratio) / 2);
} else if (incr > lrint(m_increment * ratio * 2)) {
incr = lrint(m_increment * ratio * 2);
}
double divdiff = (m_increment * ratio) - incr;
@@ -288,58 +250,112 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
{
std::vector<float> df = smoothDF(rawDf);
// We distinguish between "soft" and "hard" peaks. A soft peak is
// simply the result of peak-picking on the smoothed onset
// detection function, and it represents any (strong-ish) onset.
// We aim to ensure always that soft peaks are placed at the
// correct position in time. A hard peak is where there is a very
// rapid rise in detection function, and it presumably represents
// a more broadband, noisy transient. For these we perform a
// phase reset (if in the appropriate mode), and we locate the
// reset at the first point where we notice enough of a rapid
// rise, rather than necessarily at the peak itself, in order to
// preserve the shape of the transient.
std::set<size_t> hardPeakCandidates;
std::set<size_t> softPeakCandidates;
if (m_useHardPeaks) {
//!!! this should depend on duration based on output increment surely?
// 0.05 sec approx min between hard peaks
size_t hardPeakAmnesty = lrint(ceil(double(m_sampleRate) /
(20 * double(m_increment)))); // 0.05 sec ish
// size_t hardPeakAmnesty = 5;
(20 * double(m_increment))));
size_t prevHardPeak = 0;
std::cerr << "hardPeakAmnesty = " << hardPeakAmnesty << std::endl;
if (m_debugLevel > 1) {
std::cerr << "hardPeakAmnesty = " << hardPeakAmnesty << std::endl;
}
for (size_t i = 1; i + 1 < df.size(); ++i) {
//!!! this ratio configurable? dependent on chunk size and sr?
if (df[i] < 0.1) continue;
if (df[i] <= df[i-1] * 1.2) continue;
if (df[i] <= df[i-1] * 1.1) continue;
if (df[i] < 0.22) continue;
if (df[i] > df[i-1] * 1.4 ||
(df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) ||
df[i] > 0.4) {
if (!hardPeakCandidates.empty() &&
i < prevHardPeak + hardPeakAmnesty) {
continue;
}
size_t peakLocation = i;
if (i + 1 < rawDf.size() &&
rawDf[i + 1] > rawDf[i] * 1.4) {
++peakLocation;
}
if (m_debugLevel > 1) {
std::cerr << "hard peak at " << peakLocation << " (" << df[peakLocation] << " > " << df[peakLocation-1] << " * " << 1.4 << ")" << std::endl;
}
hardPeakCandidates.insert(peakLocation);
prevHardPeak = peakLocation;
if (!hardPeakCandidates.empty() &&
i < prevHardPeak + hardPeakAmnesty) {
continue;
}
bool hard = (df[i] > 0.4);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > absolute " << 0.4
<< std::endl;
}
if (!hard) {
hard = (df[i] > df[i-1] * 1.4);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > prev " << df[i-1] << " * 1.4"
<< std::endl;
}
}
if (!hard && i > 1) {
hard = (df[i] > df[i-1] * 1.2 &&
df[i-1] > df[i-2] * 1.2);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > prev " << df[i-1] << " * 1.2 and "
<< df[i-1] << " > prev " << df[i-2] << " * 1.2"
<< std::endl;
}
}
if (!hard && i > 2) {
// have already established that df[i] > df[i-1] * 1.1
hard = (df[i] > 0.3 &&
df[i-1] > df[i-2] * 1.1 &&
df[i-2] > df[i-3] * 1.1);
if (hard && (m_debugLevel > 1)) {
std::cerr << "hard peak at " << i << ": " << df[i]
<< " > prev " << df[i-1] << " * 1.1 and "
<< df[i-1] << " > prev " << df[i-2] << " * 1.1 and "
<< df[i-2] << " > prev " << df[i-3] << " * 1.1"
<< std::endl;
}
}
if (!hard) continue;
// (df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) ||
// df[i] > 0.4) {
size_t peakLocation = i;
if (i + 1 < rawDf.size() &&
rawDf[i + 1] > rawDf[i] * 1.4) {
++peakLocation;
if (m_debugLevel > 1) {
std::cerr << "pushing hard peak forward to " << peakLocation << ": " << df[peakLocation] << " > " << df[peakLocation-1] << " * " << 1.4 << std::endl;
}
}
hardPeakCandidates.insert(peakLocation);
prevHardPeak = peakLocation;
}
}
//!!! we don't yet do the right thing with soft peaks. if
//!useHardPeaks, we should be resetting on soft peaks; if
//useHardPeaks, we should be ignoring soft peaks if they occur
//shortly after hard ones, otherwise either resetting on them, or
//at least making sure they fall at the correct sample time
// int mediansize = lrint(ceil(double(m_sampleRate) /
// (4 * double(m_increment)))); // 0.25 sec ish
size_t medianmaxsize = lrint(ceil(double(m_sampleRate) /
double(m_increment))); // 1 sec ish
// int mediansize = lrint(ceil(double(m_sampleRate) /
// (2 * double(m_increment)))); // 0.5 sec ish
if (m_debugLevel > 1) {
std::cerr << "mediansize = " << medianmaxsize << std::endl;
@@ -382,16 +398,13 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
if (mediansize < 2) {
if (mediansize > medianmaxsize) { // absurd, but never mind that
// std::cerr << "(<2) pop front ";
medianwin.pop_front();
}
if (nextDf < df.size()) {
// std::cerr << "(<2) push back " << df[nextDf] << " ";
medianwin.push_back(df[nextDf]);
} else {
medianwin.push_back(0);
}
// std::cerr << "(<2) continue" << std::endl;
continue;
}
@@ -411,16 +424,16 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
if (index == sorted.size()-1 && index > 0) --index;
float thresh = sorted[index];
if (m_debugLevel > 2) {
// if (m_debugLevel > 2) {
// std::cerr << "medianwin[" << middle << "] = " << medianwin[middle] << ", thresh = " << thresh << std::endl;
if (medianwin[middle] == 0.f) {
// if (medianwin[middle] == 0.f) {
// std::cerr << "contents: ";
for (size_t j = 0; j < medianwin.size(); ++j) {
// for (size_t j = 0; j < medianwin.size(); ++j) {
// std::cerr << medianwin[j] << " ";
}
// }
// std::cerr << std::endl;
}
}
// }
// }
if (medianwin[middle] > thresh &&
medianwin[middle] > medianwin[middle-1] &&
@@ -439,31 +452,21 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
}
}
//!!! we should distinguish between soft peaks (any found
//using the above method) and hard peaks, which also show
//a very rapid rise in detection function prior to the
//peak (the first value after the rise is not necessarily
//the peak itself, but it is probably where we should
//locate the phase reset). For hard peaks we need to
//reset in time to preserve the shape of the transient
//(unless some option is set to soft mode), for soft peaks
//we just want to avoid poor timing positioning so we
//build up to the reset at the exact peak moment.
// size_t peak = i + maxindex - mediansize;
size_t peak = i + maxindex - middle;
// std::cerr << "i = " << i << ", maxindex = " << maxindex << ", middle = " << middle << ", so peak at " << peak << std::endl;
// if (peak > 0) --peak; //!!! that's a fudge
if (softPeakCandidates.empty() || lastSoftPeak != peak) {
if (m_debugLevel > 1) {
std::cerr << "soft peak at " << peak << " (" << peak * m_increment << "): "
<< medianwin[middle] << " > " << thresh << " and "
<< medianwin[middle] << " > " << medianwin[middle-1] << " and "
<< medianwin[middle] << " > " << medianwin[middle+1]
std::cerr << "soft peak at " << peak << " ("
<< peak * m_increment << "): "
<< medianwin[middle] << " > "
<< thresh << " and "
<< medianwin[middle]
<< " > " << medianwin[middle-1] << " and "
<< medianwin[middle]
<< " > " << medianwin[middle+1]
<< std::endl;
}
@@ -484,56 +487,65 @@ StretchCalculator::findPeaks(const std::vector<float> &rawDf)
} else if (softPeakAmnesty > 0) --softPeakAmnesty;
// std::cerr << "i = " << i << " ";
if (mediansize >= medianmaxsize) {
// std::cerr << "(>= " << medianmaxsize << ") pop front ";
medianwin.pop_front();
}
if (nextDf < df.size()) {
// std::cerr << "(" << nextDf << "<" << df.size() << ") push back " << df[nextDf] << " ";
medianwin.push_back(df[nextDf]);
} else {
medianwin.push_back(0);
}
// std::cerr << "continue" << std::endl;
}
std::vector<Peak> peaks;
//!!!
// if (!softPeakCandidates.empty()) {
// std::cerr << "clearing " << softPeakCandidates.size() << " soft peak candidates" << std::endl;
// }
// softPeakCandidates.clear();
while (!hardPeakCandidates.empty() || !softPeakCandidates.empty()) {
bool haveHardPeak = !hardPeakCandidates.empty();
bool haveSoftPeak = !softPeakCandidates.empty();
size_t hardPeak = (haveHardPeak ? *hardPeakCandidates.begin() : 0);
size_t softPeak = (haveSoftPeak ? *softPeakCandidates.begin() : 0);
Peak peak;
peak.hard = false;
peak.chunk = softPeak;
bool ignore = false;
if (haveHardPeak &&
(!haveSoftPeak || hardPeak <= softPeak)) {
if (m_debugLevel > 2) {
std::cerr << "Hard peak: " << hardPeak << std::endl;
}
peak.hard = true;
peak.chunk = hardPeak;
hardPeakCandidates.erase(hardPeakCandidates.begin());
} else {
if (m_debugLevel > 2) {
std::cerr << "Soft peak: " << softPeak << std::endl;
}
if (!peaks.empty() &&
peaks[peaks.size()-1].hard &&
peaks[peaks.size()-1].chunk + 3 >= softPeak) {
if (m_debugLevel > 2) {
std::cerr << "(ignoring, as we just had a hard peak)"
<< std::endl;
}
ignore = true;
}
}
if (haveSoftPeak && peak.chunk == softPeak) {
softPeakCandidates.erase(softPeakCandidates.begin());
}
peaks.push_back(peak);
if (!ignore) {
peaks.push_back(peak);
}
}
return peaks;
@@ -551,10 +563,6 @@ StretchCalculator::smoothDF(const std::vector<float> &df)
total += df[i]; ++count;
if (i+1 < df.size()) { total += df[i+1]; ++count; }
float mean = total / count;
// if (isnan(mean)) {
// std::cerr << "ERROR: mean at " << i << " (of " << df.size() << ") is NaN: dfs are: "
// << df[i-1] << ", " << df[i] << ", " << df[i+1] << std::endl;
// }
smoothedDF.push_back(mean);
}
@@ -574,9 +582,9 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
// the region, we should set all the values up to that point to
// the same value as the peak.
//!!! this is not subtle enough, especially if the region is long
//-- we want a bound that corresponds to acoustic perception of
//the audible bounce
// (This might not be subtle enough, especially if the region is
// long -- we want a bound that corresponds to acoustic perception
// of the audible bounce.)
for (size_t i = 1; i < df.size()/2; ++i) {
if (df[i] < df[i-1]) {
@@ -600,16 +608,15 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
// tending back towards the maximum df, so that the stretchiness
// reduces at the end of the stretched region.
int reducedRegion = (0.1 * m_sampleRate) / m_increment;
if (reducedRegion > df.size()/5) reducedRegion = df.size()/5;
int reducedRegion = lrint((0.1 * m_sampleRate) / m_increment);
if (reducedRegion > int(df.size()/5)) reducedRegion = df.size()/5;
for (size_t i = 0; i < reducedRegion; ++i) {
for (int i = 0; i < reducedRegion; ++i) {
size_t index = df.size() - reducedRegion + i;
df[index] = df[index] + ((maxDf - df[index]) * i) / reducedRegion;
}
long toAllot = long(duration) - long(m_increment * df.size());
// bool negative = (toAllot < 0);
if (m_debugLevel > 1) {
std::cerr << "region of " << df.size() << " chunks, output duration " << duration << ", toAllot " << toAllot << std::endl;
@@ -617,20 +624,25 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
size_t totalIncrement = 0;
//!!! we need to place limits on the amount of displacement per
//chunk. if ratio < 0, no increment should be larger than
//increment*ratio or smaller than increment*ratio/2; if ratio > 0,
//none should be smaller than increment*ratio or larger than
//increment*ratio*2. We need to enforce this in the assignment of
//displacements to allotments, not by trying to respond if
//something turns out wrong
// We place limits on the amount of displacement per chunk. if
// ratio < 0, no increment should be larger than increment*ratio
// or smaller than increment*ratio/2; if ratio > 0, none should be
// smaller than increment*ratio or larger than increment*ratio*2.
// We need to enforce this in the assignment of displacements to
// allotments, not by trying to respond if something turns out
// wrong.
//!!! ratio is only provided to this function for the purposes of
//establishing this bound to the displacement
// Note that the ratio is only provided to this function for the
// purposes of establishing this bound to the displacement.
// so if maxDisplacement / totalDisplacement > increment * ratio*2 - increment (for ratio > 1)
// or maxDisplacement / totalDisplacement < increment * ratio/2 (for ratio < 1)
// then we need to adjust... what?
// so if
// maxDisplacement / totalDisplacement > increment * ratio*2 - increment
// (for ratio > 1)
// or
// maxDisplacement / totalDisplacement < increment * ratio/2
// (for ratio < 1)
// then we need to adjust and accommodate
bool acceptableSquashRange = false;
@@ -663,7 +675,7 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
int extremeIncrement = m_increment + lrint((toAllot * maxDisplacement) / totalDisplacement);
if (ratio < 1.0) {
if (extremeIncrement > lrint(ceil(m_increment * ratio))) {
std::cerr << "ERROR: extreme increment " << extremeIncrement << " > " << m_increment * ratio << " (I thought this couldn't happen?)" << std::endl;
std::cerr << "ERROR: extreme increment " << extremeIncrement << " > " << m_increment * ratio << " (this should not happen)" << std::endl;
} else if (extremeIncrement < (m_increment * ratio) / 2) {
if (m_debugLevel > 0) {
std::cerr << "WARNING: extreme increment " << extremeIncrement << " < " << (m_increment * ratio) / 2 << std::endl;
@@ -684,19 +696,8 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
if (!acceptableSquashRange) {
// Need to make maxDisplacement smaller as a proportion of
// the total displacement, yet ensure that the
// displacements still sum to the total. How?
// std::cerr << "Adjusting df values by " << maxDf/10 << "..." << std::endl;
// std::cerr << "now: ";
// for (size_t i = 0; i < df.size(); ++i) {
// df[i] += maxDf/10;
// std::cerr << df[i] << " ";
// }
// std::cerr << std::endl;
// displacements still sum to the total.
adj += maxDf/10;
//...
}
}
@@ -729,7 +730,9 @@ StretchCalculator::distributeRegion(const std::vector<float> &dfIn,
int increment = m_increment + allotment;
if (increment <= 0) {
//!!! this is a serious problem, the allocation is quite wrong if it allows increment to diverge so far from the input increment
// this is a serious problem, the allocation is quite
// wrong if it allows increment to diverge so far from the
// input increment
std::cerr << "*** WARNING: increment " << increment << " <= 0, rounding to zero" << std::endl;
increment = 0;
allotment = increment - m_increment;