Overlap/add fixes. Some phase problems still here

This commit is contained in:
Chris Cannam
2022-05-24 12:00:54 +01:00
parent cd0ee3e4f6
commit 2b401e5cbe
6 changed files with 86 additions and 32 deletions

View File

@@ -43,6 +43,7 @@ StretchCalculator::StretchCalculator(size_t sampleRate,
m_prevDf(0),
m_prevRatio(1.0),
m_prevTimeRatio(1.0),
m_justReset(true),
m_transientAmnesty(0),
m_debugLevel(0),
m_useHardPeaks(useHardPeaks),
@@ -371,7 +372,9 @@ StretchCalculator::calculateSingle(double timeRatio,
// / pitchScale if resampling is happening after stretching). So
// the overall ratio is timeRatio / effectivePitchRatio.
bool ratioChanged = (ratio != m_prevRatio);
bool ratioChanged = (!m_justReset) && (ratio != m_prevRatio);
m_justReset = false;
if (ratioChanged) {
// Reset our frame counters from the ratio change.
@@ -535,6 +538,8 @@ StretchCalculator::reset()
m_outFrameCounter = 0.0;
m_transientAmnesty = 0;
m_keyFrameMap.clear();
m_justReset = true;
}
std::vector<StretchCalculator::Peak>

View File

@@ -98,6 +98,7 @@ protected:
float m_prevDf;
double m_prevRatio;
double m_prevTimeRatio;
bool m_justReset;
int m_transientAmnesty; // only in RT mode; handled differently offline
int m_debugLevel;
bool m_useHardPeaks;

View File

@@ -24,6 +24,9 @@
#ifndef RUBBERBAND_GUIDE_H
#define RUBBERBAND_GUIDE_H
#include <functional>
#include <sstream>
namespace RubberBand
{
@@ -86,24 +89,31 @@ public:
};
struct Configuration {
int classificationFftSize;
int longestFftSize;
int shortestFftSize;
int classificationFftSize;
BandLimits fftBandLimits[3];
Configuration(int _classificationFftSize, int _longestFftSize) :
classificationFftSize(_classificationFftSize),
longestFftSize(_longestFftSize) { }
Configuration(int _longestFftSize, int _shortestFftSize,
int _classificationFftSize) :
longestFftSize(_longestFftSize),
shortestFftSize(_shortestFftSize),
classificationFftSize(_classificationFftSize) { }
};
struct Parameters {
double sampleRate;
Parameters(double _sampleRate) :
sampleRate(_sampleRate) { }
std::function<void(const std::string &)> logger;
Parameters(double _sampleRate,
std::function<void(const std::string &)> _log) :
sampleRate(_sampleRate),
logger(_log) { }
};
Guide(Parameters parameters) :
m_parameters(parameters),
m_configuration(roundUp(int(ceil(parameters.sampleRate / 32.0))),
roundUp(int(ceil(parameters.sampleRate / 16.0)))),
m_configuration(roundUp(int(ceil(parameters.sampleRate / 16.0))),
roundUp(int(ceil(parameters.sampleRate / 64.0))),
roundUp(int(ceil(parameters.sampleRate / 32.0)))),
m_defaultLower(700.0), m_defaultHigher(4800.0),
m_maxLower(1100.0), m_maxHigher(7000.0)
{
@@ -216,6 +226,22 @@ public:
guidance.phaseLockBands[3].beta = betaFor(10000.0, ratio);
guidance.phaseLockBands[3].f0 = higher;
guidance.phaseLockBands[3].f1 = nyquist;
std::ostringstream str;
str << "Guidance: FFT bands: ["
<< guidance.fftBands[0].fftSize << " from "
<< guidance.fftBands[0].f0 << " to " << guidance.fftBands[0].f1
<< ", "
<< guidance.fftBands[1].fftSize << " from "
<< guidance.fftBands[1].f0 << " to " << guidance.fftBands[1].f1
<< ", "
<< guidance.fftBands[2].fftSize << " from "
<< guidance.fftBands[2].f0 << " to " << guidance.fftBands[2].f1
<< "]; phase reset range: ["
<< guidance.phaseReset.present << " from "
<< guidance.phaseReset.f0 << " to " << guidance.phaseReset.f1
<< "]" << std::endl;
m_parameters.logger(str.str());
}
protected:

View File

@@ -96,15 +96,18 @@ public:
(configuration.fftBandLimits[myFftBand].f0min);
int highest = binForFrequency
(configuration.fftBandLimits[myFftBand].f1max);
if (!m_reported) {
std::ostringstream ostr;
ostr << "PhaseAdvance: fftSize = " << m_parameters.fftSize
<< ": bins = " << bs << ", channels = " << channels
<< ", inhop = "<< inhop << ", outhop = " << outhop
<< ", ratio = " << ratio << std::endl;
ostr << "PhaseAdvance: lowest possible = " << lowest
<< "Hz, highest = " << highest << "Hz" << std::endl;
ostr << "PhaseAdvance: lowest possible bin = " << lowest
<< " (" << configuration.fftBandLimits[myFftBand].f0min
<< "Hz), highest = " << highest
<< " (" << configuration.fftBandLimits[myFftBand].f1max
<< "Hz)" << std::endl;
m_parameters.logger(ostr.str());
m_reported = true;
}
@@ -160,9 +163,10 @@ public:
++phaseLockBand;
}
double ph = 0.0;
/*
if (inRange(f, g->phaseReset) || inRange(f, g->kick)) {
ph = phase[c][i];
} else if (inRange (f, g->highPercussive)) {
} else */ if (inRange (f, g->highPercussive)) {
ph = m_unlocked[c][i];
} else {
int peak = m_currentPeaks[c][i];

View File

@@ -176,6 +176,8 @@ R3StretcherImpl::consume()
int longest = m_guideConfiguration.longestFftSize;
int classify = m_guideConfiguration.classificationFftSize;
m_calculator->setDebugLevel(3);
int outhop = m_calculator->calculateSingle(ratio,
1.0 / m_pitchScale,
1.f,
@@ -183,6 +185,8 @@ R3StretcherImpl::consume()
longest,
longest);
std::cout << "outhop = " << outhop << std::endl;
double instantaneousRatio = double(outhop) / double(m_inhop);
while (m_channelData.at(0)->outbuf->getWriteSpace() >= outhop) {
@@ -292,7 +296,6 @@ R3StretcherImpl::consume()
int fftSize = band.fftSize;
auto scale = cd->scales.at(fftSize);
auto scaleData = m_scaleData.at(fftSize);
double factor = m_parameters.sampleRate / double(fftSize);
//!!! messy and v slow, but leave it until we've
//!!! discovered whether we need a window accumulator
@@ -307,9 +310,11 @@ R3StretcherImpl::consume()
}
winscale = float(outhop) / winscale;
double factor = m_parameters.sampleRate / double(fftSize);
for (int i = 0; i < fftSize/2 + 1; ++i) {
double f = double(i) * factor;
if (f >= band.f0 && f < band.f1) {
//!!! check the mod 2 bit from stretch-fn
scale->mag[i] *= winscale;
} else {
scale->mag[i] = 0.f;
@@ -321,29 +326,41 @@ R3StretcherImpl::consume()
int fftSize = it.first;
auto scale = it.second;
auto scaleData = m_scaleData.at(fftSize);
int bufSize = scale->bufSize;
scaleData->fft.inversePolar(scale->mag.data(),
scale->phase.data(),
scale->timeDomainFrame.data());
int synthesisWindowSize = scaleData->synthesisWindow.getSize();
int offset = (fftSize - synthesisWindowSize) / 2;
scaleData->synthesisWindow.cutAndAdd
(scale->timeDomainFrame.data() + offset,
scale->accumulator.data());
}
int fromOffset = (fftSize - synthesisWindowSize) / 2;
int toOffset = (m_guideConfiguration.longestFftSize -
synthesisWindowSize) / 2;
scaleData->synthesisWindow.cutAndAdd
(scale->timeDomainFrame.data() + fromOffset,
scale->accumulator.data() + toOffset);
}
auto mixptr = cd->mixdown.data();
v_zero(mixptr, outhop);
v_zero(cd->mixdown.data(), outhop);
for (auto it : cd->scales) {
auto scale = it.second;
auto &acc = scale->accumulator;
v_add(cd->mixdown.data(), acc.data(), outhop);
int n = acc.size() - outhop;
v_move(acc.data(), acc.data() + outhop, n);
v_zero(acc.data() + n, outhop);
v_add(mixptr, scale->accumulator.data(), outhop);
}
cd->outbuf->write(cd->mixdown.data(), outhop);
cd->outbuf->write(mixptr, outhop);
for (auto it : cd->scales) {
int fftSize = it.first;
auto scale = it.second;
auto accptr = scale->accumulator.data();
int n = scale->accumulator.size() - outhop;
v_move(accptr, accptr + outhop, n);
v_zero(accptr + n, outhop);
}
if (readSpace < m_inhop) {
// This should happen only when draining
cd->inbuf->skip(readSpace);

View File

@@ -60,7 +60,7 @@ public:
m_parameters(parameters),
m_timeRatio(initialTimeRatio),
m_pitchScale(initialPitchScale),
m_guide(Guide::Parameters(m_parameters.sampleRate)),
m_guide(Guide::Parameters(m_parameters.sampleRate, parameters.logger)),
m_guideConfiguration(m_guide.getConfiguration()),
m_channelAssembly(m_parameters.channels),
m_troughPicker(m_guideConfiguration.classificationFftSize / 2 + 1),
@@ -84,7 +84,8 @@ public:
for (auto band: m_guideConfiguration.fftBandLimits) {
int fftSize = band.fftSize;
m_channelData[c]->scales[fftSize] =
std::make_shared<ChannelScaleData>(fftSize);
std::make_shared<ChannelScaleData>
(fftSize, m_guideConfiguration.longestFftSize);
}
}
@@ -135,7 +136,7 @@ protected:
FixedVector<double> prevOutPhase;
FixedVector<float> accumulator;
ChannelScaleData(int _fftSize) :
ChannelScaleData(int _fftSize, int _longestFftSize) :
fftSize(_fftSize),
bufSize(fftSize/2 + 1),
timeDomainFrame(fftSize, 0.f),
@@ -145,7 +146,7 @@ protected:
nextTroughs(bufSize, 0),
prevMag(bufSize, 0.f),
prevOutPhase(bufSize, 0.f),
accumulator(fftSize, 0.f)
accumulator(_longestFftSize, 0.f)
{ }
private:
@@ -170,7 +171,7 @@ protected:
segmenter(new BinSegmenter(segmenterParameters,
classifierParameters)),
segmentation(), prevSegmentation(), nextSegmentation(),
mixdown(ringBufferSize, 0.f), //!!! could be much shorter (bound is the max outhop)
mixdown(ringBufferSize, 0.f), //!!! could be shorter (bound is the max fft size I think)
inbuf(new RingBuffer<float>(ringBufferSize)),
outbuf(new RingBuffer<float>(ringBufferSize)) { }
};