2007-11-06 21:41:16 +00:00
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
2012-09-09 16:57:42 +01:00
Rubber Band Library
2007-11-06 21:41:16 +00:00
An audio time - stretching and pitch - shifting library .
2022-01-04 17:50:15 +00:00
Copyright 2007 - 2022 Particular Programs Ltd .
2012-09-09 16:57:42 +01:00
2007-11-06 21:41:16 +00:00
This program is free software ; you can redistribute it and / or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation ; either version 2 of the
License , or ( at your option ) any later version . See the file
COPYING included with this distribution for more information .
2012-09-09 16:57:42 +01:00
Alternatively , if you have a valid commercial licence for the
Rubber Band Library obtained by agreement with the copyright
holders , you may redistribute and / or modify it under the terms
described in that licence .
If you wish to distribute code using the Rubber Band Library
under terms other than those of the GNU General Public License ,
you must obtain a valid commercial licence before doing so .
2007-11-06 21:41:16 +00:00
*/
# include "StretchCalculator.h"
# include <math.h>
# include <iostream>
# include <deque>
# include <set>
# include <cassert>
2008-05-22 16:54:27 +00:00
# include <algorithm>
2022-05-23 17:59:40 +01:00
# include "sysutils.h"
2007-11-06 21:41:16 +00:00
namespace RubberBand
{
StretchCalculator : : StretchCalculator ( size_t sampleRate ,
size_t inputIncrement ,
2022-06-21 16:06:16 +01:00
bool useHardPeaks ,
Log log ) :
2007-11-06 21:41:16 +00:00
m_sampleRate ( sampleRate ) ,
m_increment ( inputIncrement ) ,
m_prevDf ( 0 ) ,
m_prevRatio ( 1.0 ) ,
2021-05-13 18:04:43 +01:00
m_prevTimeRatio ( 1.0 ) ,
2022-05-24 12:00:54 +01:00
m_justReset ( true ) ,
2007-11-20 20:17:13 +00:00
m_transientAmnesty ( 0 ) ,
2014-09-03 17:11:07 +01:00
m_debugLevel ( 0 ) ,
2021-05-13 18:04:43 +01:00
m_useHardPeaks ( useHardPeaks ) ,
m_inFrameCounter ( 0 ) ,
m_frameCheckpoint ( 0 , 0 ) ,
2022-06-21 16:06:16 +01:00
m_outFrameCounter ( 0 ) ,
m_log ( log )
2007-11-06 21:41:16 +00:00
{
2007-11-20 20:17:13 +00:00
// std::cerr << "StretchCalculator::StretchCalculator: useHardPeaks = " << useHardPeaks << std::endl;
2007-11-06 21:41:16 +00:00
}
StretchCalculator : : ~ StretchCalculator ( )
{
}
2010-03-24 09:44:51 +00:00
void
StretchCalculator : : setKeyFrameMap ( const std : : map < size_t , size_t > & mapping )
{
m_keyFrameMap = mapping ;
// Ensure we always have a 0 -> 0 mapping. If there's nothing in
// the map at all, don't need to worry about this (empty map is
// handled separately anyway)
if ( ! m_keyFrameMap . empty ( ) ) {
if ( m_keyFrameMap . find ( 0 ) = = m_keyFrameMap . end ( ) ) {
m_keyFrameMap [ 0 ] = 0 ;
}
}
}
2007-11-06 21:41:16 +00:00
std : : vector < int >
StretchCalculator : : calculate ( double ratio , size_t inputDuration ,
2022-05-18 14:12:57 +01:00
const std : : vector < float > & phaseResetDf )
2007-11-06 21:41:16 +00:00
{
2010-03-24 09:44:51 +00:00
m_peaks = findPeaks ( phaseResetDf ) ;
2007-11-06 21:41:16 +00:00
2010-03-24 09:44:51 +00:00
size_t totalCount = phaseResetDf . size ( ) ;
2007-11-06 21:41:16 +00:00
size_t outputDuration = lrint ( inputDuration * ratio ) ;
if ( m_debugLevel > 0 ) {
std : : cerr < < " StretchCalculator::calculate(): inputDuration " < < inputDuration < < " , ratio " < < ratio < < " , outputDuration " < < outputDuration ;
}
2007-11-19 20:13:39 +00:00
outputDuration = lrint ( ( phaseResetDf . size ( ) * m_increment ) * ratio ) ;
2007-11-06 21:41:16 +00:00
if ( m_debugLevel > 0 ) {
std : : cerr < < " (rounded up to " < < outputDuration < < " ) " ;
2010-03-24 09:44:51 +00:00
std : : cerr < < " , df size " < < phaseResetDf . size ( ) < < " , increment "
< < m_increment < < std : : endl ;
2007-11-06 21:41:16 +00:00
}
2010-03-24 09:44:51 +00:00
std : : vector < Peak > peaks ; // peak position (in chunks) and hardness
std : : vector < size_t > targets ; // targets for mapping peaks (in samples)
mapPeaks ( peaks , targets , outputDuration , totalCount ) ;
2007-11-06 21:41:16 +00:00
if ( m_debugLevel > 1 ) {
std : : cerr < < " have " < < peaks . size ( ) < < " fixed positions " < < std : : endl ;
}
size_t totalInput = 0 , totalOutput = 0 ;
2010-03-24 09:44:51 +00:00
std : : vector < int > increments ;
2007-11-06 21:41:16 +00:00
for ( size_t i = 0 ; i < = peaks . size ( ) ; + + i ) {
2007-11-18 21:38:18 +00:00
size_t regionStart , regionStartChunk , regionEnd , regionEndChunk ;
2007-11-20 20:17:13 +00:00
bool phaseReset = false ;
2007-11-06 21:41:16 +00:00
if ( i = = 0 ) {
2007-11-18 21:38:18 +00:00
regionStartChunk = 0 ;
2007-11-06 21:41:16 +00:00
regionStart = 0 ;
} else {
2007-11-18 21:38:18 +00:00
regionStartChunk = peaks [ i - 1 ] . chunk ;
2010-03-24 09:44:51 +00:00
regionStart = targets [ i - 1 ] ;
2007-11-20 20:17:13 +00:00
phaseReset = peaks [ i - 1 ] . hard ;
2007-11-06 21:41:16 +00:00
}
if ( i = = peaks . size ( ) ) {
2010-03-24 09:44:51 +00:00
// std::cerr << "note: i (=" << i << ") == peaks.size(); regionEndChunk " << regionEndChunk << " -> " << totalCount << ", regionEnd " << regionEnd << " -> " << outputDuration << std::endl;
2007-11-18 21:38:18 +00:00
regionEndChunk = totalCount ;
2007-11-06 21:41:16 +00:00
regionEnd = outputDuration ;
} else {
2007-11-18 21:38:18 +00:00
regionEndChunk = peaks [ i ] . chunk ;
2010-03-24 09:44:51 +00:00
regionEnd = targets [ i ] ;
2007-11-06 21:41:16 +00:00
}
2022-05-18 14:12:57 +01:00
2010-03-24 09:44:51 +00:00
if ( regionStartChunk > totalCount ) regionStartChunk = totalCount ;
if ( regionStart > outputDuration ) regionStart = outputDuration ;
if ( regionEndChunk > totalCount ) regionEndChunk = totalCount ;
if ( regionEnd > outputDuration ) regionEnd = outputDuration ;
2007-11-06 21:41:16 +00:00
2022-05-18 14:12:57 +01:00
if ( regionEndChunk < regionStartChunk ) regionEndChunk = regionStartChunk ;
if ( regionEnd < regionStart ) regionEnd = regionStart ;
2007-11-06 21:41:16 +00:00
2022-05-18 14:12:57 +01:00
size_t regionDuration = regionEnd - regionStart ;
2007-11-06 21:41:16 +00:00
2022-05-18 14:12:57 +01:00
size_t nchunks = regionEndChunk - regionStartChunk ;
2022-06-13 17:16:03 +01:00
if ( m_debugLevel > 1 ) {
std : : cerr < < " region from " < < regionStartChunk < < " to " < < regionEndChunk < < " (samples " < < regionStart < < " to " < < regionEnd < < " ) " < < std : : endl ;
}
2007-11-06 21:41:16 +00:00
2022-05-18 14:12:57 +01:00
if ( nchunks = = 0 ) {
2022-06-13 17:16:03 +01:00
if ( m_debugLevel > 1 ) {
std : : cerr < < " note: nchunks == 0 " < < std : : endl ;
}
continue ;
2022-05-18 14:12:57 +01:00
}
2007-11-06 21:41:16 +00:00
2022-05-18 14:12:57 +01:00
double per = double ( regionDuration ) / double ( nchunks ) ;
double acc = 0.0 ;
size_t nremaining = nchunks ;
2007-11-06 21:41:16 +00:00
size_t totalForRegion = 0 ;
2022-05-18 14:12:57 +01:00
if ( phaseReset ) {
size_t incr ;
if ( nchunks > 1 ) {
incr = m_increment ;
if ( incr > regionDuration ) {
incr = regionDuration ;
}
} else {
incr = regionDuration ;
}
increments . push_back ( - int64_t ( incr ) ) ;
per = double ( regionDuration - incr ) / double ( nchunks - 1 ) ;
acc + = incr ;
totalForRegion + = incr ;
2007-11-06 21:41:16 +00:00
totalInput + = m_increment ;
2022-05-18 14:12:57 +01:00
nremaining = nremaining - 1 ;
2007-11-06 21:41:16 +00:00
}
2022-05-18 14:12:57 +01:00
if ( nremaining > 0 ) {
for ( size_t j = 0 ; j + 1 < nremaining ; + + j ) {
acc + = per ;
size_t incr = size_t ( round ( acc - totalForRegion ) ) ;
increments . push_back ( incr ) ;
totalForRegion + = incr ;
totalInput + = m_increment ;
}
if ( regionDuration > totalForRegion ) {
size_t final = regionDuration - totalForRegion ;
increments . push_back ( final ) ;
totalForRegion + = final ;
totalInput + = m_increment ;
}
2007-11-06 21:41:16 +00:00
}
totalOutput + = totalForRegion ;
}
if ( m_debugLevel > 0 ) {
2007-11-21 21:48:16 +00:00
std : : cerr < < " total input increment = " < < totalInput < < " (= " < < totalInput / m_increment < < " chunks), output = " < < totalOutput < < " , ratio = " < < double ( totalOutput ) / double ( totalInput ) < < " , ideal output " < < size_t ( ceil ( totalInput * ratio ) ) < < std : : endl ;
2007-11-06 21:41:16 +00:00
}
2007-11-20 20:17:13 +00:00
2007-11-06 21:41:16 +00:00
return increments ;
}
2010-03-24 09:44:51 +00:00
void
StretchCalculator : : mapPeaks ( std : : vector < Peak > & peaks ,
std : : vector < size_t > & targets ,
size_t outputDuration ,
size_t totalCount )
{
// outputDuration is in audio samples; totalCount is in chunks
if ( m_keyFrameMap . empty ( ) ) {
// "normal" behaviour -- fixed points are strictly in
// proportion
peaks = m_peaks ;
for ( size_t i = 0 ; i < peaks . size ( ) ; + + i ) {
targets . push_back
( lrint ( ( double ( peaks [ i ] . chunk ) * outputDuration ) / totalCount ) ) ;
}
return ;
}
// We have been given a set of source -> target sample frames in
// m_keyFrameMap. We want to ensure that (to the nearest chunk) these
// are followed exactly, and any fixed points that we calculated
// ourselves are interpolated in linear proportion in between.
size_t peakidx = 0 ;
std : : map < size_t , size_t > : : const_iterator mi = m_keyFrameMap . begin ( ) ;
// NB we know for certain we have a mapping from 0 -> 0 (or at
// least, some mapping for source sample 0) because that is
2015-06-16 08:51:55 +01:00
// enforced in setKeyFrameMap above. However, we aren't guaranteed
2010-03-24 09:44:51 +00:00
// to have a mapping for the total duration -- we will usually
// need to assume it maps to the normal duration * ratio sample
while ( mi ! = m_keyFrameMap . end ( ) ) {
// std::cerr << "mi->first is " << mi->first << ", second is " << mi->second <<std::endl;
// The map we've been given is from sample to sample, but
// we can only map from chunk to sample. We should perhaps
// adjust the target sample to compensate for the discrepancy
// between the chunk position and the exact requested source
// sample. But we aren't doing that yet.
size_t sourceStartChunk = mi - > first / m_increment ;
size_t sourceEndChunk = totalCount ;
size_t targetStartSample = mi - > second ;
size_t targetEndSample = outputDuration ;
+ + mi ;
if ( mi ! = m_keyFrameMap . end ( ) ) {
sourceEndChunk = mi - > first / m_increment ;
targetEndSample = mi - > second ;
}
if ( sourceStartChunk > = totalCount | |
sourceStartChunk > = sourceEndChunk | |
targetStartSample > = outputDuration | |
targetStartSample > = targetEndSample ) {
std : : cerr < < " NOTE: ignoring mapping from chunk " < < sourceStartChunk < < " to sample " < < targetStartSample < < " \n (source or target chunk exceeds total count, or end is not later than start) " < < std : : endl ;
continue ;
}
// one peak and target for the mapping, then one for each of
// the computed peaks that appear before the following mapping
Peak p ;
p . chunk = sourceStartChunk ;
p . hard = false ; // mappings are in time only, not phase reset points
peaks . push_back ( p ) ;
targets . push_back ( targetStartSample ) ;
if ( m_debugLevel > 1 ) {
std : : cerr < < " mapped chunk " < < sourceStartChunk < < " (frame " < < sourceStartChunk * m_increment < < " ) -> " < < targetStartSample < < std : : endl ;
}
while ( peakidx < m_peaks . size ( ) ) {
size_t pchunk = m_peaks [ peakidx ] . chunk ;
if ( pchunk < sourceStartChunk ) {
// shouldn't happen, should have been dealt with
// already -- but no harm in ignoring it explicitly
+ + peakidx ;
continue ;
}
if ( pchunk = = sourceStartChunk ) {
// convert that last peak to a hard one, after all
peaks [ peaks . size ( ) - 1 ] . hard = true ;
+ + peakidx ;
continue ;
}
if ( pchunk > = sourceEndChunk ) {
// leave the rest for after the next mapping
break ;
}
p . chunk = pchunk ;
p . hard = m_peaks [ peakidx ] . hard ;
double proportion =
double ( pchunk - sourceStartChunk ) /
double ( sourceEndChunk - sourceStartChunk ) ;
size_t target =
targetStartSample +
lrint ( proportion *
( targetEndSample - targetStartSample ) ) ;
if ( target < = targets [ targets . size ( ) - 1 ] + m_increment ) {
// peaks will become too close together afterwards, ignore
+ + peakidx ;
continue ;
}
if ( m_debugLevel > 1 ) {
std : : cerr < < " peak chunk " < < pchunk < < " (frame " < < pchunk * m_increment < < " ) -> " < < target < < std : : endl ;
}
peaks . push_back ( p ) ;
targets . push_back ( target ) ;
+ + peakidx ;
}
}
}
2021-05-13 18:04:43 +01:00
int64_t
StretchCalculator : : expectedOutFrame ( int64_t inFrame , double timeRatio )
{
int64_t checkpointedAt = m_frameCheckpoint . first ;
int64_t checkpointed = m_frameCheckpoint . second ;
return int64_t ( round ( checkpointed + ( inFrame - checkpointedAt ) * timeRatio ) ) ;
}
2007-11-06 21:41:16 +00:00
int
2021-05-13 18:04:43 +01:00
StretchCalculator : : calculateSingle ( double timeRatio ,
double effectivePitchRatio ,
2008-05-22 16:54:27 +00:00
float df ,
2021-05-13 18:04:43 +01:00
size_t inIncrement ,
size_t analysisWindowSize ,
2022-06-13 17:16:03 +01:00
size_t synthesisWindowSize ,
bool alignFrameStarts )
2007-11-06 21:41:16 +00:00
{
2021-05-13 18:04:43 +01:00
double ratio = timeRatio / effectivePitchRatio ;
int increment = int ( inIncrement ) ;
2008-05-22 16:54:27 +00:00
if ( increment = = 0 ) increment = m_increment ;
2021-05-13 18:04:43 +01:00
int outIncrement = lrint ( increment * ratio ) ; // the normal case
2007-11-06 21:41:16 +00:00
bool isTransient = false ;
2021-05-13 18:04:43 +01:00
2007-11-20 20:17:13 +00:00
// We want to ensure, as close as possible, that the phase reset
2021-05-13 18:04:43 +01:00
// points appear at the right audio frame numbers. To this end we
// track the incoming frame number, its corresponding expected
// output frame number, and the actual output frame number
// projected based on the ratios provided.
//
// There are two subtleties:
//
// (1) on a ratio change, we need to checkpoint the expected
// output frame number reached so far and start counting again
// with the new ratio. We could do this with a reset to zero, but
// it's easier to reason about absolute input/output frame
// matches, so for the moment at least we're doing this by
// explicitly checkpointing the current numbers (hence the use of
// the above expectedOutFrame() function which refers to the
// last checkpointed values).
//
// (2) in the case of a pitch shift in a configuration where
// resampling occurs after stretching, all of our output
// increments will be effectively modified by resampling after we
// return. This is why we separate out timeRatio and
// effectivePitchRatio arguments - the former is the ratio that
// has already been applied and the latter is the ratio that will
// be applied by any subsequent resampling step (which will be 1.0
// / pitchScale if resampling is happening after stretching). So
// the overall ratio is timeRatio / effectivePitchRatio.
2022-05-24 12:00:54 +01:00
bool ratioChanged = ( ! m_justReset ) & & ( ratio ! = m_prevRatio ) ;
m_justReset = false ;
2021-05-13 18:04:43 +01:00
if ( ratioChanged ) {
// Reset our frame counters from the ratio change.
// m_outFrameCounter tracks the frames counted at output from
// this function, which normally precedes resampling - hence
// the use of timeRatio rather than ratio here
if ( m_debugLevel > 1 ) {
std : : cerr < < " StretchCalculator: ratio changed from " < < m_prevRatio < < " to " < < ratio < < std : : endl ;
}
2007-11-20 20:17:13 +00:00
2021-05-13 18:04:43 +01:00
int64_t toCheckpoint = expectedOutFrame
( m_inFrameCounter , m_prevTimeRatio ) ;
m_frameCheckpoint =
std : : pair < int64_t , int64_t > ( m_inFrameCounter , toCheckpoint ) ;
}
m_prevRatio = ratio ;
m_prevTimeRatio = timeRatio ;
if ( m_debugLevel > 2 ) {
std : : cerr < < " StretchCalculator::calculateSingle: timeRatio = "
< < timeRatio < < " , effectivePitchRatio = "
< < effectivePitchRatio < < " (that's 1.0 / "
< < ( 1.0 / effectivePitchRatio )
< < " ), ratio = " < < ratio < < " , df = " < < df
< < " , inIncrement = " < < inIncrement
< < " , default outIncrement = " < < outIncrement
< < " , analysisWindowSize = " < < analysisWindowSize
< < " , synthesisWindowSize = " < < synthesisWindowSize
< < std : : endl ;
std : : cerr < < " inFrameCounter = " < < m_inFrameCounter
< < " , outFrameCounter = " < < m_outFrameCounter
< < std : : endl ;
std : : cerr < < " The next sample out is input sample " < < m_inFrameCounter < < std : : endl ;
}
2022-06-13 17:16:03 +01:00
int64_t intended , projected ;
if ( alignFrameStarts ) { // R3
intended = expectedOutFrame ( m_inFrameCounter , timeRatio ) ;
2022-06-14 17:03:33 +01:00
projected =
int64_t ( round ( m_outFrameCounter ) ) ;
2022-06-13 17:16:03 +01:00
} else { // R2
intended = expectedOutFrame
( m_inFrameCounter + analysisWindowSize / 4 , timeRatio ) ;
projected =
int64_t ( round ( m_outFrameCounter +
( synthesisWindowSize / 4 * effectivePitchRatio ) ) ) ;
}
2021-05-14 08:46:37 +01:00
int64_t divergence = projected - intended ;
2021-05-13 18:04:43 +01:00
if ( m_debugLevel > 2 ) {
2021-05-14 08:46:37 +01:00
std : : cerr < < " for current frame + quarter frame: intended " < < intended < < " , projected " < < projected < < " , divergence " < < divergence < < std : : endl ;
2021-05-13 18:04:43 +01:00
}
2007-11-20 20:17:13 +00:00
// In principle, the threshold depends on chunk size: larger chunk
// sizes need higher thresholds. Since chunk size depends on
// ratio, I suppose we could in theory calculate the threshold
// from the ratio directly. For the moment we're happy if it
// works well in common situations.
2007-11-06 21:41:16 +00:00
2008-05-22 16:54:27 +00:00
float transientThreshold = 0.35f ;
2010-03-24 09:44:51 +00:00
// if (ratio > 1) transientThreshold = 0.25f;
2007-11-06 21:41:16 +00:00
2008-05-22 16:54:27 +00:00
if ( m_useHardPeaks & & df > m_prevDf * 1.1f & & df > transientThreshold ) {
2021-05-14 14:24:54 +01:00
if ( divergence > 1000 | | divergence < - 1000 ) {
if ( m_debugLevel > 1 ) {
std : : cerr < < " StretchCalculator::calculateSingle: transient, but we're not permitting it because the divergence ( " < < divergence < < " ) is too great " < < std : : endl ;
}
} else {
isTransient = true ;
}
2007-11-06 21:41:16 +00:00
}
if ( m_debugLevel > 2 ) {
std : : cerr < < " df = " < < df < < " , prevDf = " < < m_prevDf
< < " , thresh = " < < transientThreshold < < std : : endl ;
}
m_prevDf = df ;
2021-05-13 18:04:43 +01:00
if ( m_transientAmnesty > 0 ) {
if ( isTransient ) {
if ( m_debugLevel > 1 ) {
std : : cerr < < " StretchCalculator::calculateSingle: transient, but we have an amnesty (df " < < df < < " , threshold " < < transientThreshold < < " ) " < < std : : endl ;
}
isTransient = false ;
}
- - m_transientAmnesty ;
}
if ( isTransient ) {
2007-11-06 21:41:16 +00:00
if ( m_debugLevel > 1 ) {
2021-05-13 18:04:43 +01:00
std : : cerr < < " StretchCalculator::calculateSingle: transient at (df " < < df < < " , threshold " < < transientThreshold < < " ) " < < std : : endl ;
2007-11-06 21:41:16 +00:00
}
2007-11-20 20:17:13 +00:00
// as in offline mode, 0.05 sec approx min between transients
m_transientAmnesty =
2008-05-22 16:54:27 +00:00
lrint ( ceil ( double ( m_sampleRate ) / ( 20 * double ( increment ) ) ) ) ;
2007-11-20 20:17:13 +00:00
2021-05-13 18:04:43 +01:00
outIncrement = increment ;
2007-11-06 21:41:16 +00:00
2021-05-13 18:04:43 +01:00
} else {
2007-11-06 21:41:16 +00:00
2021-05-14 08:46:37 +01:00
double recovery = 0.0 ;
if ( divergence > 1000 | | divergence < - 1000 ) {
recovery = divergence / ( ( m_sampleRate / 10.0 ) / increment ) ;
} else if ( divergence > 100 | | divergence < - 100 ) {
2021-05-14 11:35:08 +01:00
recovery = divergence / ( ( m_sampleRate / 20.0 ) / increment ) ;
2021-05-14 08:46:37 +01:00
} else {
recovery = divergence / 4.0 ;
2021-05-13 18:04:43 +01:00
}
2007-11-06 21:41:16 +00:00
2021-05-14 08:46:37 +01:00
int incr = lrint ( outIncrement - recovery ) ;
if ( m_debugLevel > 2 | | ( m_debugLevel > 1 & & divergence ! = 0 ) ) {
std : : cerr < < " divergence = " < < divergence < < " , recovery = " < < recovery < < " , incr = " < < incr < < " , " ;
2021-05-13 18:04:43 +01:00
}
2021-05-14 14:24:54 +01:00
int minIncr = lrint ( increment * ratio * 0.3 ) ;
2021-05-14 08:46:37 +01:00
int maxIncr = lrint ( increment * ratio * 2 ) ;
if ( incr < minIncr ) {
incr = minIncr ;
} else if ( incr > maxIncr ) {
incr = maxIncr ;
2021-05-13 18:04:43 +01:00
}
2021-05-14 08:46:37 +01:00
if ( m_debugLevel > 2 | | ( m_debugLevel > 1 & & divergence ! = 0 ) ) {
std : : cerr < < " clamped into [ " < < minIncr < < " , " < < maxIncr
< < " ] becomes " < < incr < < std : : endl ;
2021-05-13 18:04:43 +01:00
}
2007-11-06 21:41:16 +00:00
2021-05-13 18:04:43 +01:00
if ( incr < 0 ) {
std : : cerr < < " WARNING: internal error: incr < 0 in calculateSingle "
< < std : : endl ;
outIncrement = 0 ;
} else {
outIncrement = incr ;
}
2007-11-06 21:41:16 +00:00
}
2021-05-13 18:04:43 +01:00
if ( m_debugLevel > 1 ) {
std : : cerr < < " StretchCalculator::calculateSingle: returning isTransient = "
< < isTransient < < " , outIncrement = " < < outIncrement
< < std : : endl ;
2007-11-06 21:41:16 +00:00
}
2021-05-13 18:04:43 +01:00
m_inFrameCounter + = inIncrement ;
m_outFrameCounter + = outIncrement * effectivePitchRatio ;
if ( isTransient ) {
return - outIncrement ;
} else {
return outIncrement ;
}
2007-11-06 21:41:16 +00:00
}
void
StretchCalculator : : reset ( )
{
m_prevDf = 0 ;
2017-09-26 09:51:22 +01:00
m_prevRatio = 1.0 ;
2021-05-14 08:46:37 +01:00
m_prevTimeRatio = 1.0 ;
m_inFrameCounter = 0 ;
m_frameCheckpoint = std : : pair < int64_t , int64_t > ( 0 , 0 ) ;
m_outFrameCounter = 0.0 ;
2017-09-26 09:51:22 +01:00
m_transientAmnesty = 0 ;
m_keyFrameMap . clear ( ) ;
2022-05-24 12:00:54 +01:00
m_justReset = true ;
2007-11-06 21:41:16 +00:00
}
std : : vector < StretchCalculator : : Peak >
StretchCalculator : : findPeaks ( const std : : vector < float > & rawDf )
{
std : : vector < float > df = smoothDF ( rawDf ) ;
2007-11-20 20:17:13 +00:00
// We distinguish between "soft" and "hard" peaks. A soft peak is
// simply the result of peak-picking on the smoothed onset
// detection function, and it represents any (strong-ish) onset.
// We aim to ensure always that soft peaks are placed at the
// correct position in time. A hard peak is where there is a very
// rapid rise in detection function, and it presumably represents
// a more broadband, noisy transient. For these we perform a
// phase reset (if in the appropriate mode), and we locate the
// reset at the first point where we notice enough of a rapid
// rise, rather than necessarily at the peak itself, in order to
// preserve the shape of the transient.
2007-11-06 21:41:16 +00:00
std : : set < size_t > hardPeakCandidates ;
std : : set < size_t > softPeakCandidates ;
if ( m_useHardPeaks ) {
2007-11-20 20:17:13 +00:00
// 0.05 sec approx min between hard peaks
2007-11-06 21:41:16 +00:00
size_t hardPeakAmnesty = lrint ( ceil ( double ( m_sampleRate ) /
2007-11-20 20:17:13 +00:00
( 20 * double ( m_increment ) ) ) ) ;
2007-11-06 21:41:16 +00:00
size_t prevHardPeak = 0 ;
2007-11-20 20:17:13 +00:00
if ( m_debugLevel > 1 ) {
std : : cerr < < " hardPeakAmnesty = " < < hardPeakAmnesty < < std : : endl ;
}
for ( size_t i = 1 ; i + 1 < df . size ( ) ; + + i ) {
2007-11-06 21:41:16 +00:00
if ( df [ i ] < 0.1 ) continue ;
2007-11-20 20:17:13 +00:00
if ( df [ i ] < = df [ i - 1 ] * 1.1 ) continue ;
if ( df [ i ] < 0.22 ) continue ;
if ( ! hardPeakCandidates . empty ( ) & &
i < prevHardPeak + hardPeakAmnesty ) {
continue ;
}
bool hard = ( df [ i ] > 0.4 ) ;
if ( hard & & ( m_debugLevel > 1 ) ) {
std : : cerr < < " hard peak at " < < i < < " : " < < df [ i ]
< < " > absolute " < < 0.4
< < std : : endl ;
}
if ( ! hard ) {
hard = ( df [ i ] > df [ i - 1 ] * 1.4 ) ;
if ( hard & & ( m_debugLevel > 1 ) ) {
std : : cerr < < " hard peak at " < < i < < " : " < < df [ i ]
< < " > prev " < < df [ i - 1 ] < < " * 1.4 "
< < std : : endl ;
2007-11-06 21:41:16 +00:00
}
2007-11-20 20:17:13 +00:00
}
if ( ! hard & & i > 1 ) {
hard = ( df [ i ] > df [ i - 1 ] * 1.2 & &
df [ i - 1 ] > df [ i - 2 ] * 1.2 ) ;
if ( hard & & ( m_debugLevel > 1 ) ) {
std : : cerr < < " hard peak at " < < i < < " : " < < df [ i ]
< < " > prev " < < df [ i - 1 ] < < " * 1.2 and "
< < df [ i - 1 ] < < " > prev " < < df [ i - 2 ] < < " * 1.2 "
< < std : : endl ;
}
}
if ( ! hard & & i > 2 ) {
// have already established that df[i] > df[i-1] * 1.1
hard = ( df [ i ] > 0.3 & &
df [ i - 1 ] > df [ i - 2 ] * 1.1 & &
df [ i - 2 ] > df [ i - 3 ] * 1.1 ) ;
if ( hard & & ( m_debugLevel > 1 ) ) {
std : : cerr < < " hard peak at " < < i < < " : " < < df [ i ]
< < " > prev " < < df [ i - 1 ] < < " * 1.1 and "
< < df [ i - 1 ] < < " > prev " < < df [ i - 2 ] < < " * 1.1 and "
< < df [ i - 2 ] < < " > prev " < < df [ i - 3 ] < < " * 1.1 "
< < std : : endl ;
2007-11-06 21:41:16 +00:00
}
2007-11-20 20:17:13 +00:00
}
if ( ! hard ) continue ;
// (df[i+1] > df[i] && df[i+1] > df[i-1] * 1.8) ||
// df[i] > 0.4) {
size_t peakLocation = i ;
if ( i + 1 < rawDf . size ( ) & &
rawDf [ i + 1 ] > rawDf [ i ] * 1.4 ) {
+ + peakLocation ;
2007-11-06 21:41:16 +00:00
if ( m_debugLevel > 1 ) {
2007-11-20 20:17:13 +00:00
std : : cerr < < " pushing hard peak forward to " < < peakLocation < < " : " < < df [ peakLocation ] < < " > " < < df [ peakLocation - 1 ] < < " * " < < 1.4 < < std : : endl ;
2007-11-06 21:41:16 +00:00
}
}
2007-11-20 20:17:13 +00:00
hardPeakCandidates . insert ( peakLocation ) ;
prevHardPeak = peakLocation ;
2007-11-06 21:41:16 +00:00
}
}
size_t medianmaxsize = lrint ( ceil ( double ( m_sampleRate ) /
double ( m_increment ) ) ) ; // 1 sec ish
if ( m_debugLevel > 1 ) {
std : : cerr < < " mediansize = " < < medianmaxsize < < std : : endl ;
}
if ( medianmaxsize < 7 ) {
medianmaxsize = 7 ;
if ( m_debugLevel > 1 ) {
std : : cerr < < " adjusted mediansize = " < < medianmaxsize < < std : : endl ;
}
}
int minspacing = lrint ( ceil ( double ( m_sampleRate ) /
( 20 * double ( m_increment ) ) ) ) ; // 0.05 sec ish
std : : deque < float > medianwin ;
std : : vector < float > sorted ;
int softPeakAmnesty = 0 ;
for ( size_t i = 0 ; i < medianmaxsize / 2 ; + + i ) {
medianwin . push_back ( 0 ) ;
}
for ( size_t i = 0 ; i < medianmaxsize / 2 & & i < df . size ( ) ; + + i ) {
medianwin . push_back ( df [ i ] ) ;
}
size_t lastSoftPeak = 0 ;
for ( size_t i = 0 ; i < df . size ( ) ; + + i ) {
size_t mediansize = medianmaxsize ;
if ( medianwin . size ( ) < mediansize ) {
mediansize = medianwin . size ( ) ;
}
2007-11-08 10:42:50 +00:00
size_t middle = medianmaxsize / 2 ;
if ( middle > = mediansize ) middle = mediansize - 1 ;
size_t nextDf = i + mediansize - middle ;
2007-11-06 21:41:16 +00:00
if ( mediansize < 2 ) {
2007-11-08 10:42:50 +00:00
if ( mediansize > medianmaxsize ) { // absurd, but never mind that
2007-11-06 21:41:16 +00:00
medianwin . pop_front ( ) ;
}
2007-11-08 10:42:50 +00:00
if ( nextDf < df . size ( ) ) {
medianwin . push_back ( df [ nextDf ] ) ;
} else {
medianwin . push_back ( 0 ) ;
}
2007-11-06 21:41:16 +00:00
continue ;
}
if ( m_debugLevel > 2 ) {
// std::cerr << "have " << mediansize << " in median buffer" << std::endl;
}
sorted . clear ( ) ;
for ( size_t j = 0 ; j < mediansize ; + + j ) {
sorted . push_back ( medianwin [ j ] ) ;
}
std : : sort ( sorted . begin ( ) , sorted . end ( ) ) ;
size_t n = 90 ; // percentile above which we pick peaks
size_t index = ( sorted . size ( ) * n ) / 100 ;
if ( index > = sorted . size ( ) ) index = sorted . size ( ) - 1 ;
if ( index = = sorted . size ( ) - 1 & & index > 0 ) - - index ;
float thresh = sorted [ index ] ;
2007-11-20 20:17:13 +00:00
// if (m_debugLevel > 2) {
2007-11-08 10:42:50 +00:00
// std::cerr << "medianwin[" << middle << "] = " << medianwin[middle] << ", thresh = " << thresh << std::endl;
2007-11-20 20:17:13 +00:00
// if (medianwin[middle] == 0.f) {
2007-11-08 10:42:50 +00:00
// std::cerr << "contents: ";
2007-11-20 20:17:13 +00:00
// for (size_t j = 0; j < medianwin.size(); ++j) {
2007-11-08 10:42:50 +00:00
// std::cerr << medianwin[j] << " ";
2007-11-20 20:17:13 +00:00
// }
2007-11-08 10:42:50 +00:00
// std::cerr << std::endl;
2007-11-20 20:17:13 +00:00
// }
// }
2007-11-06 21:41:16 +00:00
if ( medianwin [ middle ] > thresh & &
medianwin [ middle ] > medianwin [ middle - 1 ] & &
medianwin [ middle ] > medianwin [ middle + 1 ] & &
softPeakAmnesty = = 0 ) {
size_t maxindex = middle ;
float maxval = medianwin [ middle ] ;
for ( size_t j = middle + 1 ; j < mediansize ; + + j ) {
if ( medianwin [ j ] > maxval ) {
maxval = medianwin [ j ] ;
maxindex = j ;
} else if ( medianwin [ j ] < medianwin [ middle ] ) {
break ;
}
}
size_t peak = i + maxindex - middle ;
2007-11-08 10:42:50 +00:00
// std::cerr << "i = " << i << ", maxindex = " << maxindex << ", middle = " << middle << ", so peak at " << peak << std::endl;
2007-11-07 22:34:30 +00:00
2007-11-06 21:41:16 +00:00
if ( softPeakCandidates . empty ( ) | | lastSoftPeak ! = peak ) {
if ( m_debugLevel > 1 ) {
2007-11-20 20:17:13 +00:00
std : : cerr < < " soft peak at " < < peak < < " ( "
< < peak * m_increment < < " ): "
< < medianwin [ middle ] < < " > "
< < thresh < < " and "
< < medianwin [ middle ]
< < " > " < < medianwin [ middle - 1 ] < < " and "
< < medianwin [ middle ]
< < " > " < < medianwin [ middle + 1 ]
2007-11-06 21:41:16 +00:00
< < std : : endl ;
}
if ( peak > = df . size ( ) ) {
if ( m_debugLevel > 2 ) {
std : : cerr < < " peak is beyond end " < < std : : endl ;
}
} else {
softPeakCandidates . insert ( peak ) ;
lastSoftPeak = peak ;
}
}
softPeakAmnesty = minspacing + maxindex - middle ;
if ( m_debugLevel > 2 ) {
std : : cerr < < " amnesty = " < < softPeakAmnesty < < std : : endl ;
}
} else if ( softPeakAmnesty > 0 ) - - softPeakAmnesty ;
2007-11-08 10:42:50 +00:00
if ( mediansize > = medianmaxsize ) {
2007-11-06 21:41:16 +00:00
medianwin . pop_front ( ) ;
}
2007-11-08 10:42:50 +00:00
if ( nextDf < df . size ( ) ) {
medianwin . push_back ( df [ nextDf ] ) ;
} else {
medianwin . push_back ( 0 ) ;
}
2007-11-06 21:41:16 +00:00
}
std : : vector < Peak > peaks ;
while ( ! hardPeakCandidates . empty ( ) | | ! softPeakCandidates . empty ( ) ) {
2007-11-20 20:17:13 +00:00
2007-11-06 21:41:16 +00:00
bool haveHardPeak = ! hardPeakCandidates . empty ( ) ;
bool haveSoftPeak = ! softPeakCandidates . empty ( ) ;
2007-11-20 20:17:13 +00:00
2007-11-06 21:41:16 +00:00
size_t hardPeak = ( haveHardPeak ? * hardPeakCandidates . begin ( ) : 0 ) ;
size_t softPeak = ( haveSoftPeak ? * softPeakCandidates . begin ( ) : 0 ) ;
2007-11-20 20:17:13 +00:00
2007-11-06 21:41:16 +00:00
Peak peak ;
peak . hard = false ;
2007-11-18 21:38:18 +00:00
peak . chunk = softPeak ;
2007-11-20 20:17:13 +00:00
bool ignore = false ;
2007-11-06 21:41:16 +00:00
if ( haveHardPeak & &
( ! haveSoftPeak | | hardPeak < = softPeak ) ) {
2007-11-20 20:17:13 +00:00
2007-11-06 21:41:16 +00:00
if ( m_debugLevel > 2 ) {
std : : cerr < < " Hard peak: " < < hardPeak < < std : : endl ;
}
2007-11-20 20:17:13 +00:00
2007-11-06 21:41:16 +00:00
peak . hard = true ;
2007-11-18 21:38:18 +00:00
peak . chunk = hardPeak ;
2007-11-06 21:41:16 +00:00
hardPeakCandidates . erase ( hardPeakCandidates . begin ( ) ) ;
2007-11-20 20:17:13 +00:00
2007-11-06 21:41:16 +00:00
} else {
if ( m_debugLevel > 2 ) {
std : : cerr < < " Soft peak: " < < softPeak < < std : : endl ;
}
2007-11-20 20:17:13 +00:00
if ( ! peaks . empty ( ) & &
peaks [ peaks . size ( ) - 1 ] . hard & &
peaks [ peaks . size ( ) - 1 ] . chunk + 3 > = softPeak ) {
if ( m_debugLevel > 2 ) {
std : : cerr < < " (ignoring, as we just had a hard peak) "
< < std : : endl ;
}
ignore = true ;
}
2007-11-06 21:41:16 +00:00
}
2007-11-18 21:38:18 +00:00
if ( haveSoftPeak & & peak . chunk = = softPeak ) {
2007-11-06 21:41:16 +00:00
softPeakCandidates . erase ( softPeakCandidates . begin ( ) ) ;
}
2007-11-20 20:17:13 +00:00
if ( ! ignore ) {
peaks . push_back ( peak ) ;
}
2007-11-06 21:41:16 +00:00
}
return peaks ;
}
std : : vector < float >
StretchCalculator : : smoothDF ( const std : : vector < float > & df )
{
std : : vector < float > smoothedDF ;
for ( size_t i = 0 ; i < df . size ( ) ; + + i ) {
// three-value moving mean window for simple smoothing
float total = 0.f , count = 0 ;
if ( i > 0 ) { total + = df [ i - 1 ] ; + + count ; }
total + = df [ i ] ; + + count ;
if ( i + 1 < df . size ( ) ) { total + = df [ i + 1 ] ; + + count ; }
float mean = total / count ;
smoothedDF . push_back ( mean ) ;
}
return smoothedDF ;
}
}