Overlap/add fixes. Some phase problems still here
This commit is contained in:
@@ -43,6 +43,7 @@ StretchCalculator::StretchCalculator(size_t sampleRate,
|
||||
m_prevDf(0),
|
||||
m_prevRatio(1.0),
|
||||
m_prevTimeRatio(1.0),
|
||||
m_justReset(true),
|
||||
m_transientAmnesty(0),
|
||||
m_debugLevel(0),
|
||||
m_useHardPeaks(useHardPeaks),
|
||||
@@ -371,7 +372,9 @@ StretchCalculator::calculateSingle(double timeRatio,
|
||||
// / pitchScale if resampling is happening after stretching). So
|
||||
// the overall ratio is timeRatio / effectivePitchRatio.
|
||||
|
||||
bool ratioChanged = (ratio != m_prevRatio);
|
||||
bool ratioChanged = (!m_justReset) && (ratio != m_prevRatio);
|
||||
m_justReset = false;
|
||||
|
||||
if (ratioChanged) {
|
||||
// Reset our frame counters from the ratio change.
|
||||
|
||||
@@ -535,6 +538,8 @@ StretchCalculator::reset()
|
||||
m_outFrameCounter = 0.0;
|
||||
m_transientAmnesty = 0;
|
||||
m_keyFrameMap.clear();
|
||||
|
||||
m_justReset = true;
|
||||
}
|
||||
|
||||
std::vector<StretchCalculator::Peak>
|
||||
|
||||
@@ -98,6 +98,7 @@ protected:
|
||||
float m_prevDf;
|
||||
double m_prevRatio;
|
||||
double m_prevTimeRatio;
|
||||
bool m_justReset;
|
||||
int m_transientAmnesty; // only in RT mode; handled differently offline
|
||||
int m_debugLevel;
|
||||
bool m_useHardPeaks;
|
||||
|
||||
@@ -24,6 +24,9 @@
|
||||
#ifndef RUBBERBAND_GUIDE_H
|
||||
#define RUBBERBAND_GUIDE_H
|
||||
|
||||
#include <functional>
|
||||
#include <sstream>
|
||||
|
||||
namespace RubberBand
|
||||
{
|
||||
|
||||
@@ -86,24 +89,31 @@ public:
|
||||
};
|
||||
|
||||
struct Configuration {
|
||||
int classificationFftSize;
|
||||
int longestFftSize;
|
||||
int shortestFftSize;
|
||||
int classificationFftSize;
|
||||
BandLimits fftBandLimits[3];
|
||||
Configuration(int _classificationFftSize, int _longestFftSize) :
|
||||
classificationFftSize(_classificationFftSize),
|
||||
longestFftSize(_longestFftSize) { }
|
||||
Configuration(int _longestFftSize, int _shortestFftSize,
|
||||
int _classificationFftSize) :
|
||||
longestFftSize(_longestFftSize),
|
||||
shortestFftSize(_shortestFftSize),
|
||||
classificationFftSize(_classificationFftSize) { }
|
||||
};
|
||||
|
||||
struct Parameters {
|
||||
double sampleRate;
|
||||
Parameters(double _sampleRate) :
|
||||
sampleRate(_sampleRate) { }
|
||||
std::function<void(const std::string &)> logger;
|
||||
Parameters(double _sampleRate,
|
||||
std::function<void(const std::string &)> _log) :
|
||||
sampleRate(_sampleRate),
|
||||
logger(_log) { }
|
||||
};
|
||||
|
||||
Guide(Parameters parameters) :
|
||||
m_parameters(parameters),
|
||||
m_configuration(roundUp(int(ceil(parameters.sampleRate / 32.0))),
|
||||
roundUp(int(ceil(parameters.sampleRate / 16.0)))),
|
||||
m_configuration(roundUp(int(ceil(parameters.sampleRate / 16.0))),
|
||||
roundUp(int(ceil(parameters.sampleRate / 64.0))),
|
||||
roundUp(int(ceil(parameters.sampleRate / 32.0)))),
|
||||
m_defaultLower(700.0), m_defaultHigher(4800.0),
|
||||
m_maxLower(1100.0), m_maxHigher(7000.0)
|
||||
{
|
||||
@@ -216,6 +226,22 @@ public:
|
||||
guidance.phaseLockBands[3].beta = betaFor(10000.0, ratio);
|
||||
guidance.phaseLockBands[3].f0 = higher;
|
||||
guidance.phaseLockBands[3].f1 = nyquist;
|
||||
|
||||
std::ostringstream str;
|
||||
str << "Guidance: FFT bands: ["
|
||||
<< guidance.fftBands[0].fftSize << " from "
|
||||
<< guidance.fftBands[0].f0 << " to " << guidance.fftBands[0].f1
|
||||
<< ", "
|
||||
<< guidance.fftBands[1].fftSize << " from "
|
||||
<< guidance.fftBands[1].f0 << " to " << guidance.fftBands[1].f1
|
||||
<< ", "
|
||||
<< guidance.fftBands[2].fftSize << " from "
|
||||
<< guidance.fftBands[2].f0 << " to " << guidance.fftBands[2].f1
|
||||
<< "]; phase reset range: ["
|
||||
<< guidance.phaseReset.present << " from "
|
||||
<< guidance.phaseReset.f0 << " to " << guidance.phaseReset.f1
|
||||
<< "]" << std::endl;
|
||||
m_parameters.logger(str.str());
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
@@ -96,15 +96,18 @@ public:
|
||||
(configuration.fftBandLimits[myFftBand].f0min);
|
||||
int highest = binForFrequency
|
||||
(configuration.fftBandLimits[myFftBand].f1max);
|
||||
|
||||
|
||||
if (!m_reported) {
|
||||
std::ostringstream ostr;
|
||||
ostr << "PhaseAdvance: fftSize = " << m_parameters.fftSize
|
||||
<< ": bins = " << bs << ", channels = " << channels
|
||||
<< ", inhop = "<< inhop << ", outhop = " << outhop
|
||||
<< ", ratio = " << ratio << std::endl;
|
||||
ostr << "PhaseAdvance: lowest possible = " << lowest
|
||||
<< "Hz, highest = " << highest << "Hz" << std::endl;
|
||||
ostr << "PhaseAdvance: lowest possible bin = " << lowest
|
||||
<< " (" << configuration.fftBandLimits[myFftBand].f0min
|
||||
<< "Hz), highest = " << highest
|
||||
<< " (" << configuration.fftBandLimits[myFftBand].f1max
|
||||
<< "Hz)" << std::endl;
|
||||
m_parameters.logger(ostr.str());
|
||||
m_reported = true;
|
||||
}
|
||||
@@ -160,9 +163,10 @@ public:
|
||||
++phaseLockBand;
|
||||
}
|
||||
double ph = 0.0;
|
||||
/*
|
||||
if (inRange(f, g->phaseReset) || inRange(f, g->kick)) {
|
||||
ph = phase[c][i];
|
||||
} else if (inRange (f, g->highPercussive)) {
|
||||
} else */ if (inRange (f, g->highPercussive)) {
|
||||
ph = m_unlocked[c][i];
|
||||
} else {
|
||||
int peak = m_currentPeaks[c][i];
|
||||
|
||||
@@ -176,6 +176,8 @@ R3StretcherImpl::consume()
|
||||
int longest = m_guideConfiguration.longestFftSize;
|
||||
int classify = m_guideConfiguration.classificationFftSize;
|
||||
|
||||
m_calculator->setDebugLevel(3);
|
||||
|
||||
int outhop = m_calculator->calculateSingle(ratio,
|
||||
1.0 / m_pitchScale,
|
||||
1.f,
|
||||
@@ -183,6 +185,8 @@ R3StretcherImpl::consume()
|
||||
longest,
|
||||
longest);
|
||||
|
||||
std::cout << "outhop = " << outhop << std::endl;
|
||||
|
||||
double instantaneousRatio = double(outhop) / double(m_inhop);
|
||||
|
||||
while (m_channelData.at(0)->outbuf->getWriteSpace() >= outhop) {
|
||||
@@ -292,7 +296,6 @@ R3StretcherImpl::consume()
|
||||
int fftSize = band.fftSize;
|
||||
auto scale = cd->scales.at(fftSize);
|
||||
auto scaleData = m_scaleData.at(fftSize);
|
||||
double factor = m_parameters.sampleRate / double(fftSize);
|
||||
|
||||
//!!! messy and v slow, but leave it until we've
|
||||
//!!! discovered whether we need a window accumulator
|
||||
@@ -307,9 +310,11 @@ R3StretcherImpl::consume()
|
||||
}
|
||||
winscale = float(outhop) / winscale;
|
||||
|
||||
double factor = m_parameters.sampleRate / double(fftSize);
|
||||
for (int i = 0; i < fftSize/2 + 1; ++i) {
|
||||
double f = double(i) * factor;
|
||||
if (f >= band.f0 && f < band.f1) {
|
||||
//!!! check the mod 2 bit from stretch-fn
|
||||
scale->mag[i] *= winscale;
|
||||
} else {
|
||||
scale->mag[i] = 0.f;
|
||||
@@ -321,29 +326,41 @@ R3StretcherImpl::consume()
|
||||
int fftSize = it.first;
|
||||
auto scale = it.second;
|
||||
auto scaleData = m_scaleData.at(fftSize);
|
||||
int bufSize = scale->bufSize;
|
||||
|
||||
scaleData->fft.inversePolar(scale->mag.data(),
|
||||
scale->phase.data(),
|
||||
scale->timeDomainFrame.data());
|
||||
|
||||
int synthesisWindowSize = scaleData->synthesisWindow.getSize();
|
||||
int offset = (fftSize - synthesisWindowSize) / 2;
|
||||
scaleData->synthesisWindow.cutAndAdd
|
||||
(scale->timeDomainFrame.data() + offset,
|
||||
scale->accumulator.data());
|
||||
}
|
||||
int fromOffset = (fftSize - synthesisWindowSize) / 2;
|
||||
int toOffset = (m_guideConfiguration.longestFftSize -
|
||||
synthesisWindowSize) / 2;
|
||||
|
||||
scaleData->synthesisWindow.cutAndAdd
|
||||
(scale->timeDomainFrame.data() + fromOffset,
|
||||
scale->accumulator.data() + toOffset);
|
||||
}
|
||||
|
||||
auto mixptr = cd->mixdown.data();
|
||||
v_zero(mixptr, outhop);
|
||||
|
||||
v_zero(cd->mixdown.data(), outhop);
|
||||
for (auto it : cd->scales) {
|
||||
auto scale = it.second;
|
||||
auto &acc = scale->accumulator;
|
||||
v_add(cd->mixdown.data(), acc.data(), outhop);
|
||||
int n = acc.size() - outhop;
|
||||
v_move(acc.data(), acc.data() + outhop, n);
|
||||
v_zero(acc.data() + n, outhop);
|
||||
v_add(mixptr, scale->accumulator.data(), outhop);
|
||||
}
|
||||
cd->outbuf->write(cd->mixdown.data(), outhop);
|
||||
|
||||
cd->outbuf->write(mixptr, outhop);
|
||||
|
||||
for (auto it : cd->scales) {
|
||||
int fftSize = it.first;
|
||||
auto scale = it.second;
|
||||
auto accptr = scale->accumulator.data();
|
||||
|
||||
int n = scale->accumulator.size() - outhop;
|
||||
v_move(accptr, accptr + outhop, n);
|
||||
v_zero(accptr + n, outhop);
|
||||
}
|
||||
|
||||
if (readSpace < m_inhop) {
|
||||
// This should happen only when draining
|
||||
cd->inbuf->skip(readSpace);
|
||||
|
||||
@@ -60,7 +60,7 @@ public:
|
||||
m_parameters(parameters),
|
||||
m_timeRatio(initialTimeRatio),
|
||||
m_pitchScale(initialPitchScale),
|
||||
m_guide(Guide::Parameters(m_parameters.sampleRate)),
|
||||
m_guide(Guide::Parameters(m_parameters.sampleRate, parameters.logger)),
|
||||
m_guideConfiguration(m_guide.getConfiguration()),
|
||||
m_channelAssembly(m_parameters.channels),
|
||||
m_troughPicker(m_guideConfiguration.classificationFftSize / 2 + 1),
|
||||
@@ -84,7 +84,8 @@ public:
|
||||
for (auto band: m_guideConfiguration.fftBandLimits) {
|
||||
int fftSize = band.fftSize;
|
||||
m_channelData[c]->scales[fftSize] =
|
||||
std::make_shared<ChannelScaleData>(fftSize);
|
||||
std::make_shared<ChannelScaleData>
|
||||
(fftSize, m_guideConfiguration.longestFftSize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,7 +136,7 @@ protected:
|
||||
FixedVector<double> prevOutPhase;
|
||||
FixedVector<float> accumulator;
|
||||
|
||||
ChannelScaleData(int _fftSize) :
|
||||
ChannelScaleData(int _fftSize, int _longestFftSize) :
|
||||
fftSize(_fftSize),
|
||||
bufSize(fftSize/2 + 1),
|
||||
timeDomainFrame(fftSize, 0.f),
|
||||
@@ -145,7 +146,7 @@ protected:
|
||||
nextTroughs(bufSize, 0),
|
||||
prevMag(bufSize, 0.f),
|
||||
prevOutPhase(bufSize, 0.f),
|
||||
accumulator(fftSize, 0.f)
|
||||
accumulator(_longestFftSize, 0.f)
|
||||
{ }
|
||||
|
||||
private:
|
||||
@@ -170,7 +171,7 @@ protected:
|
||||
segmenter(new BinSegmenter(segmenterParameters,
|
||||
classifierParameters)),
|
||||
segmentation(), prevSegmentation(), nextSegmentation(),
|
||||
mixdown(ringBufferSize, 0.f), //!!! could be much shorter (bound is the max outhop)
|
||||
mixdown(ringBufferSize, 0.f), //!!! could be shorter (bound is the max fft size I think)
|
||||
inbuf(new RingBuffer<float>(ringBufferSize)),
|
||||
outbuf(new RingBuffer<float>(ringBufferSize)) { }
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user