Add the overlooked fftshift (that explains it!); carry out polar/cartesian conversion only for bins of interest

This commit is contained in:
Chris Cannam
2022-05-24 16:54:05 +01:00
parent c4a78b4b55
commit d45831fcc5
4 changed files with 78 additions and 34 deletions

View File

@@ -78,10 +78,14 @@ public:
int fftSize;
double f0min;
double f1max;
BandLimits(int _fftSize, double _f0min, double _f1max) :
fftSize(_fftSize), f0min(_f0min), f1max(_f1max) { }
int b0min;
int b1max;
BandLimits(int _fftSize, double _rate, double _f0min, double _f1max) :
fftSize(_fftSize), f0min(_f0min), f1max(_f1max),
b0min(int(floor(f0min * fftSize / _rate))),
b1max(int(ceil(f1max * fftSize / _rate))) { }
BandLimits() :
fftSize(0), f0min(0.f), f1max(0.f) { }
fftSize(0), f0min(0.f), f1max(0.f), b0min(0), b1max(0) { }
};
struct Configuration {
@@ -116,15 +120,18 @@ public:
{
double rate = m_parameters.sampleRate;
double nyquist = rate / 2.0;
int bandFftSize = roundUp(int(ceil(rate/16.0)));
m_configuration.fftBandLimits[0] =
BandLimits(roundUp(int(ceil(rate/16.0))),
0.0, m_maxLower);
BandLimits(bandFftSize, rate, 0.0, m_maxLower);
bandFftSize = roundUp(int(ceil(rate/32.0)));
m_configuration.fftBandLimits[1] =
BandLimits(roundUp(int(ceil(rate/32.0))),
m_minLower, m_maxHigher);
BandLimits(bandFftSize, rate, m_minLower, m_maxHigher);
bandFftSize = roundUp(int(ceil(rate/64.0)));
m_configuration.fftBandLimits[2] =
BandLimits(roundUp(int(ceil(rate/64.0))),
m_minHigher, rate/2.0);
BandLimits(bandFftSize, rate, m_minHigher, rate/2.0);
}
const Configuration &getConfiguration() const {
@@ -225,6 +232,7 @@ public:
guidance.phaseLockBands[3].f0 = higher;
guidance.phaseLockBands[3].f1 = nyquist;
/*
std::ostringstream str;
str << "Guidance: FFT bands: ["
<< guidance.fftBands[0].fftSize << " from "
@@ -240,6 +248,7 @@ public:
<< guidance.phaseReset.f0 << " to " << guidance.phaseReset.f1
<< "]" << std::endl;
m_parameters.logger(str.str());
*/
}
protected:

View File

@@ -95,10 +95,8 @@ public:
int channels = m_parameters.channels;
double ratio = double(outhop) / double(inhop);
int lowest = binForFrequency
(configuration.fftBandLimits[myFftBand].f0min);
int highest = binForFrequency
(configuration.fftBandLimits[myFftBand].f1max);
int lowest = configuration.fftBandLimits[myFftBand].b0min;
int highest = configuration.fftBandLimits[myFftBand].b1max;
if (!m_reported) {
std::ostringstream ostr;

View File

@@ -23,6 +23,8 @@
#include "R3StretcherImpl.h"
#include "common/VectorOpsComplex.h"
#include <array>
namespace RubberBand {
@@ -60,6 +62,8 @@ R3StretcherImpl::calculateHop()
m_inhop = int(round(inhop));
}
m_prevOuthop = int(round(m_inhop * ratio));
std::ostringstream str;
str << "R3StretcherImpl::calculateHop: for effective ratio " << ratio
<< " calculated (typical) inhop of " << m_inhop << std::endl;
@@ -187,17 +191,11 @@ R3StretcherImpl::consume()
std::cout << "outhop = " << outhop << std::endl;
//!!!
outhop = int(round(m_inhop * ratio));
//!!! shouldn't this be the *previous* outhop?
// double instantaneousRatio = double(outhop) / double(m_inhop);
double instantaneousRatio = ratio;
double instantaneousRatio = double(m_prevOuthop) / double(m_inhop);
m_prevOuthop = outhop;
while (m_channelData.at(0)->outbuf->getWriteSpace() >= outhop) {
//!!! m_parameters.logger("consume looping");
int readSpace = m_channelData.at(0)->inbuf->getReadSpace();
if (readSpace < longest) {
if (m_draining) {
@@ -213,7 +211,7 @@ R3StretcherImpl::consume()
auto cd = m_channelData.at(c);
auto longestScale = cd->scales.at(longest);
auto buf = longestScale->timeDomainFrame.data();
auto buf = longestScale->timeDomain.data();
if (readSpace < longest) {
v_zero(buf, longest);
@@ -228,7 +226,7 @@ R3StretcherImpl::consume()
if (fftSize == longest) continue;
int offset = (longest - fftSize) / 2;
m_scaleData.at(fftSize)->analysisWindow.cut
(buf + offset, scale->timeDomainFrame.data());
(buf + offset, scale->timeDomain.data());
}
m_scaleData.at(longest)->analysisWindow.cut(buf);
@@ -236,10 +234,26 @@ R3StretcherImpl::consume()
for (auto it: cd->scales) {
int fftSize = it.first;
auto scale = it.second;
m_scaleData.at(fftSize)->fft.forwardPolar
(scale->timeDomainFrame.data(),
scale->mag.data(),
scale->phase.data());
v_fftshift(scale->timeDomain.data(), fftSize);
m_scaleData.at(fftSize)->fft.forward
(scale->timeDomain.data(),
scale->real.data(),
scale->imag.data());
for (const auto &b : m_guideConfiguration.fftBandLimits) {
if (b.fftSize == fftSize) {
int offset = b.b0min;
v_cartesian_to_polar
(scale->mag.data() + offset,
scale->phase.data() + offset,
scale->real.data() + offset,
scale->imag.data() + offset,
b.b1max - offset);
break;
}
}
v_scale(scale->mag.data(), 1.0 / double(fftSize),
scale->mag.size());
}
@@ -328,17 +342,34 @@ R3StretcherImpl::consume()
auto scale = it.second;
auto scaleData = m_scaleData.at(fftSize);
scaleData->fft.inversePolar(scale->mag.data(),
scale->outPhase.data(),
scale->timeDomainFrame.data());
for (const auto &b : m_guideConfiguration.fftBandLimits) {
if (b.fftSize == fftSize) {
int offset = b.b0min;
v_zero(scale->real.data(), fftSize/2 + 1);
v_zero(scale->imag.data(), fftSize/2 + 1);
v_polar_to_cartesian
(scale->real.data() + offset,
scale->imag.data() + offset,
scale->mag.data() + offset,
scale->outPhase.data() + offset,
b.b1max - offset);
break;
}
}
scaleData->fft.inverse(scale->real.data(),
scale->imag.data(),
scale->timeDomain.data());
v_fftshift(scale->timeDomain.data(), fftSize);
int synthesisWindowSize = scaleData->synthesisWindow.getSize();
int fromOffset = (fftSize - synthesisWindowSize) / 2;
int toOffset = (m_guideConfiguration.longestFftSize -
synthesisWindowSize) / 2;
scaleData->synthesisWindow.cutAndAdd
(scale->timeDomainFrame.data() + fromOffset,
(scale->timeDomain.data() + fromOffset,
scale->accumulator.data() + toOffset);
}

View File

@@ -65,6 +65,7 @@ public:
m_channelAssembly(m_parameters.channels),
m_troughPicker(m_guideConfiguration.classificationFftSize / 2 + 1),
m_inhop(1),
m_prevOuthop(1),
m_draining(false)
{
BinSegmenter::Parameters segmenterParameters
@@ -127,7 +128,9 @@ protected:
int fftSize;
int bufSize; // size of every freq-domain array here: fftSize/2 + 1
//!!! review later which of these we are actually using!
FixedVector<double> timeDomainFrame;
FixedVector<double> timeDomain;
FixedVector<double> real;
FixedVector<double> imag;
FixedVector<double> mag;
FixedVector<double> phase;
FixedVector<double> outPhase; //!!! "advanced"?
@@ -139,7 +142,9 @@ protected:
ChannelScaleData(int _fftSize, int _longestFftSize) :
fftSize(_fftSize),
bufSize(fftSize/2 + 1),
timeDomainFrame(fftSize, 0.f),
timeDomain(fftSize, 0.f),
real(bufSize, 0.f),
imag(bufSize, 0.f),
mag(bufSize, 0.f),
phase(bufSize, 0.f),
outPhase(bufSize, 0.f),
@@ -213,6 +218,7 @@ protected:
Peak<double, std::less<double>> m_troughPicker;
std::unique_ptr<StretchCalculator> m_calculator;
int m_inhop;
int m_prevOuthop;
bool m_draining;
void consume();