Overlap/add fixes. Some phase problems still here

This commit is contained in:
Chris Cannam
2022-05-24 12:00:54 +01:00
parent cd0ee3e4f6
commit 2b401e5cbe
6 changed files with 86 additions and 32 deletions

View File

@@ -43,6 +43,7 @@ StretchCalculator::StretchCalculator(size_t sampleRate,
m_prevDf(0), m_prevDf(0),
m_prevRatio(1.0), m_prevRatio(1.0),
m_prevTimeRatio(1.0), m_prevTimeRatio(1.0),
m_justReset(true),
m_transientAmnesty(0), m_transientAmnesty(0),
m_debugLevel(0), m_debugLevel(0),
m_useHardPeaks(useHardPeaks), m_useHardPeaks(useHardPeaks),
@@ -371,7 +372,9 @@ StretchCalculator::calculateSingle(double timeRatio,
// / pitchScale if resampling is happening after stretching). So // / pitchScale if resampling is happening after stretching). So
// the overall ratio is timeRatio / effectivePitchRatio. // the overall ratio is timeRatio / effectivePitchRatio.
bool ratioChanged = (ratio != m_prevRatio); bool ratioChanged = (!m_justReset) && (ratio != m_prevRatio);
m_justReset = false;
if (ratioChanged) { if (ratioChanged) {
// Reset our frame counters from the ratio change. // Reset our frame counters from the ratio change.
@@ -535,6 +538,8 @@ StretchCalculator::reset()
m_outFrameCounter = 0.0; m_outFrameCounter = 0.0;
m_transientAmnesty = 0; m_transientAmnesty = 0;
m_keyFrameMap.clear(); m_keyFrameMap.clear();
m_justReset = true;
} }
std::vector<StretchCalculator::Peak> std::vector<StretchCalculator::Peak>

View File

@@ -98,6 +98,7 @@ protected:
float m_prevDf; float m_prevDf;
double m_prevRatio; double m_prevRatio;
double m_prevTimeRatio; double m_prevTimeRatio;
bool m_justReset;
int m_transientAmnesty; // only in RT mode; handled differently offline int m_transientAmnesty; // only in RT mode; handled differently offline
int m_debugLevel; int m_debugLevel;
bool m_useHardPeaks; bool m_useHardPeaks;

View File

@@ -24,6 +24,9 @@
#ifndef RUBBERBAND_GUIDE_H #ifndef RUBBERBAND_GUIDE_H
#define RUBBERBAND_GUIDE_H #define RUBBERBAND_GUIDE_H
#include <functional>
#include <sstream>
namespace RubberBand namespace RubberBand
{ {
@@ -86,24 +89,31 @@ public:
}; };
struct Configuration { struct Configuration {
int classificationFftSize;
int longestFftSize; int longestFftSize;
int shortestFftSize;
int classificationFftSize;
BandLimits fftBandLimits[3]; BandLimits fftBandLimits[3];
Configuration(int _classificationFftSize, int _longestFftSize) : Configuration(int _longestFftSize, int _shortestFftSize,
classificationFftSize(_classificationFftSize), int _classificationFftSize) :
longestFftSize(_longestFftSize) { } longestFftSize(_longestFftSize),
shortestFftSize(_shortestFftSize),
classificationFftSize(_classificationFftSize) { }
}; };
struct Parameters { struct Parameters {
double sampleRate; double sampleRate;
Parameters(double _sampleRate) : std::function<void(const std::string &)> logger;
sampleRate(_sampleRate) { } Parameters(double _sampleRate,
std::function<void(const std::string &)> _log) :
sampleRate(_sampleRate),
logger(_log) { }
}; };
Guide(Parameters parameters) : Guide(Parameters parameters) :
m_parameters(parameters), m_parameters(parameters),
m_configuration(roundUp(int(ceil(parameters.sampleRate / 32.0))), m_configuration(roundUp(int(ceil(parameters.sampleRate / 16.0))),
roundUp(int(ceil(parameters.sampleRate / 16.0)))), roundUp(int(ceil(parameters.sampleRate / 64.0))),
roundUp(int(ceil(parameters.sampleRate / 32.0)))),
m_defaultLower(700.0), m_defaultHigher(4800.0), m_defaultLower(700.0), m_defaultHigher(4800.0),
m_maxLower(1100.0), m_maxHigher(7000.0) m_maxLower(1100.0), m_maxHigher(7000.0)
{ {
@@ -216,6 +226,22 @@ public:
guidance.phaseLockBands[3].beta = betaFor(10000.0, ratio); guidance.phaseLockBands[3].beta = betaFor(10000.0, ratio);
guidance.phaseLockBands[3].f0 = higher; guidance.phaseLockBands[3].f0 = higher;
guidance.phaseLockBands[3].f1 = nyquist; guidance.phaseLockBands[3].f1 = nyquist;
std::ostringstream str;
str << "Guidance: FFT bands: ["
<< guidance.fftBands[0].fftSize << " from "
<< guidance.fftBands[0].f0 << " to " << guidance.fftBands[0].f1
<< ", "
<< guidance.fftBands[1].fftSize << " from "
<< guidance.fftBands[1].f0 << " to " << guidance.fftBands[1].f1
<< ", "
<< guidance.fftBands[2].fftSize << " from "
<< guidance.fftBands[2].f0 << " to " << guidance.fftBands[2].f1
<< "]; phase reset range: ["
<< guidance.phaseReset.present << " from "
<< guidance.phaseReset.f0 << " to " << guidance.phaseReset.f1
<< "]" << std::endl;
m_parameters.logger(str.str());
} }
protected: protected:

View File

@@ -96,15 +96,18 @@ public:
(configuration.fftBandLimits[myFftBand].f0min); (configuration.fftBandLimits[myFftBand].f0min);
int highest = binForFrequency int highest = binForFrequency
(configuration.fftBandLimits[myFftBand].f1max); (configuration.fftBandLimits[myFftBand].f1max);
if (!m_reported) { if (!m_reported) {
std::ostringstream ostr; std::ostringstream ostr;
ostr << "PhaseAdvance: fftSize = " << m_parameters.fftSize ostr << "PhaseAdvance: fftSize = " << m_parameters.fftSize
<< ": bins = " << bs << ", channels = " << channels << ": bins = " << bs << ", channels = " << channels
<< ", inhop = "<< inhop << ", outhop = " << outhop << ", inhop = "<< inhop << ", outhop = " << outhop
<< ", ratio = " << ratio << std::endl; << ", ratio = " << ratio << std::endl;
ostr << "PhaseAdvance: lowest possible = " << lowest ostr << "PhaseAdvance: lowest possible bin = " << lowest
<< "Hz, highest = " << highest << "Hz" << std::endl; << " (" << configuration.fftBandLimits[myFftBand].f0min
<< "Hz), highest = " << highest
<< " (" << configuration.fftBandLimits[myFftBand].f1max
<< "Hz)" << std::endl;
m_parameters.logger(ostr.str()); m_parameters.logger(ostr.str());
m_reported = true; m_reported = true;
} }
@@ -160,9 +163,10 @@ public:
++phaseLockBand; ++phaseLockBand;
} }
double ph = 0.0; double ph = 0.0;
/*
if (inRange(f, g->phaseReset) || inRange(f, g->kick)) { if (inRange(f, g->phaseReset) || inRange(f, g->kick)) {
ph = phase[c][i]; ph = phase[c][i];
} else if (inRange (f, g->highPercussive)) { } else */ if (inRange (f, g->highPercussive)) {
ph = m_unlocked[c][i]; ph = m_unlocked[c][i];
} else { } else {
int peak = m_currentPeaks[c][i]; int peak = m_currentPeaks[c][i];

View File

@@ -176,6 +176,8 @@ R3StretcherImpl::consume()
int longest = m_guideConfiguration.longestFftSize; int longest = m_guideConfiguration.longestFftSize;
int classify = m_guideConfiguration.classificationFftSize; int classify = m_guideConfiguration.classificationFftSize;
m_calculator->setDebugLevel(3);
int outhop = m_calculator->calculateSingle(ratio, int outhop = m_calculator->calculateSingle(ratio,
1.0 / m_pitchScale, 1.0 / m_pitchScale,
1.f, 1.f,
@@ -183,6 +185,8 @@ R3StretcherImpl::consume()
longest, longest,
longest); longest);
std::cout << "outhop = " << outhop << std::endl;
double instantaneousRatio = double(outhop) / double(m_inhop); double instantaneousRatio = double(outhop) / double(m_inhop);
while (m_channelData.at(0)->outbuf->getWriteSpace() >= outhop) { while (m_channelData.at(0)->outbuf->getWriteSpace() >= outhop) {
@@ -292,7 +296,6 @@ R3StretcherImpl::consume()
int fftSize = band.fftSize; int fftSize = band.fftSize;
auto scale = cd->scales.at(fftSize); auto scale = cd->scales.at(fftSize);
auto scaleData = m_scaleData.at(fftSize); auto scaleData = m_scaleData.at(fftSize);
double factor = m_parameters.sampleRate / double(fftSize);
//!!! messy and v slow, but leave it until we've //!!! messy and v slow, but leave it until we've
//!!! discovered whether we need a window accumulator //!!! discovered whether we need a window accumulator
@@ -307,9 +310,11 @@ R3StretcherImpl::consume()
} }
winscale = float(outhop) / winscale; winscale = float(outhop) / winscale;
double factor = m_parameters.sampleRate / double(fftSize);
for (int i = 0; i < fftSize/2 + 1; ++i) { for (int i = 0; i < fftSize/2 + 1; ++i) {
double f = double(i) * factor; double f = double(i) * factor;
if (f >= band.f0 && f < band.f1) { if (f >= band.f0 && f < band.f1) {
//!!! check the mod 2 bit from stretch-fn
scale->mag[i] *= winscale; scale->mag[i] *= winscale;
} else { } else {
scale->mag[i] = 0.f; scale->mag[i] = 0.f;
@@ -321,29 +326,41 @@ R3StretcherImpl::consume()
int fftSize = it.first; int fftSize = it.first;
auto scale = it.second; auto scale = it.second;
auto scaleData = m_scaleData.at(fftSize); auto scaleData = m_scaleData.at(fftSize);
int bufSize = scale->bufSize;
scaleData->fft.inversePolar(scale->mag.data(), scaleData->fft.inversePolar(scale->mag.data(),
scale->phase.data(), scale->phase.data(),
scale->timeDomainFrame.data()); scale->timeDomainFrame.data());
int synthesisWindowSize = scaleData->synthesisWindow.getSize(); int synthesisWindowSize = scaleData->synthesisWindow.getSize();
int offset = (fftSize - synthesisWindowSize) / 2; int fromOffset = (fftSize - synthesisWindowSize) / 2;
scaleData->synthesisWindow.cutAndAdd int toOffset = (m_guideConfiguration.longestFftSize -
(scale->timeDomainFrame.data() + offset, synthesisWindowSize) / 2;
scale->accumulator.data());
} scaleData->synthesisWindow.cutAndAdd
(scale->timeDomainFrame.data() + fromOffset,
scale->accumulator.data() + toOffset);
}
auto mixptr = cd->mixdown.data();
v_zero(mixptr, outhop);
v_zero(cd->mixdown.data(), outhop);
for (auto it : cd->scales) { for (auto it : cd->scales) {
auto scale = it.second; auto scale = it.second;
auto &acc = scale->accumulator; v_add(mixptr, scale->accumulator.data(), outhop);
v_add(cd->mixdown.data(), acc.data(), outhop);
int n = acc.size() - outhop;
v_move(acc.data(), acc.data() + outhop, n);
v_zero(acc.data() + n, outhop);
} }
cd->outbuf->write(cd->mixdown.data(), outhop);
cd->outbuf->write(mixptr, outhop);
for (auto it : cd->scales) {
int fftSize = it.first;
auto scale = it.second;
auto accptr = scale->accumulator.data();
int n = scale->accumulator.size() - outhop;
v_move(accptr, accptr + outhop, n);
v_zero(accptr + n, outhop);
}
if (readSpace < m_inhop) { if (readSpace < m_inhop) {
// This should happen only when draining // This should happen only when draining
cd->inbuf->skip(readSpace); cd->inbuf->skip(readSpace);

View File

@@ -60,7 +60,7 @@ public:
m_parameters(parameters), m_parameters(parameters),
m_timeRatio(initialTimeRatio), m_timeRatio(initialTimeRatio),
m_pitchScale(initialPitchScale), m_pitchScale(initialPitchScale),
m_guide(Guide::Parameters(m_parameters.sampleRate)), m_guide(Guide::Parameters(m_parameters.sampleRate, parameters.logger)),
m_guideConfiguration(m_guide.getConfiguration()), m_guideConfiguration(m_guide.getConfiguration()),
m_channelAssembly(m_parameters.channels), m_channelAssembly(m_parameters.channels),
m_troughPicker(m_guideConfiguration.classificationFftSize / 2 + 1), m_troughPicker(m_guideConfiguration.classificationFftSize / 2 + 1),
@@ -84,7 +84,8 @@ public:
for (auto band: m_guideConfiguration.fftBandLimits) { for (auto band: m_guideConfiguration.fftBandLimits) {
int fftSize = band.fftSize; int fftSize = band.fftSize;
m_channelData[c]->scales[fftSize] = m_channelData[c]->scales[fftSize] =
std::make_shared<ChannelScaleData>(fftSize); std::make_shared<ChannelScaleData>
(fftSize, m_guideConfiguration.longestFftSize);
} }
} }
@@ -135,7 +136,7 @@ protected:
FixedVector<double> prevOutPhase; FixedVector<double> prevOutPhase;
FixedVector<float> accumulator; FixedVector<float> accumulator;
ChannelScaleData(int _fftSize) : ChannelScaleData(int _fftSize, int _longestFftSize) :
fftSize(_fftSize), fftSize(_fftSize),
bufSize(fftSize/2 + 1), bufSize(fftSize/2 + 1),
timeDomainFrame(fftSize, 0.f), timeDomainFrame(fftSize, 0.f),
@@ -145,7 +146,7 @@ protected:
nextTroughs(bufSize, 0), nextTroughs(bufSize, 0),
prevMag(bufSize, 0.f), prevMag(bufSize, 0.f),
prevOutPhase(bufSize, 0.f), prevOutPhase(bufSize, 0.f),
accumulator(fftSize, 0.f) accumulator(_longestFftSize, 0.f)
{ } { }
private: private:
@@ -170,7 +171,7 @@ protected:
segmenter(new BinSegmenter(segmenterParameters, segmenter(new BinSegmenter(segmenterParameters,
classifierParameters)), classifierParameters)),
segmentation(), prevSegmentation(), nextSegmentation(), segmentation(), prevSegmentation(), nextSegmentation(),
mixdown(ringBufferSize, 0.f), //!!! could be much shorter (bound is the max outhop) mixdown(ringBufferSize, 0.f), //!!! could be shorter (bound is the max fft size I think)
inbuf(new RingBuffer<float>(ringBufferSize)), inbuf(new RingBuffer<float>(ringBufferSize)),
outbuf(new RingBuffer<float>(ringBufferSize)) { } outbuf(new RingBuffer<float>(ringBufferSize)) { }
}; };