Experimentally (re-)introduce short window mode

This commit is contained in:
Chris Cannam
2022-07-14 11:55:21 +01:00
parent dc74c993cf
commit 02928a3c86
4 changed files with 109 additions and 26 deletions

View File

@@ -105,7 +105,10 @@ public:
struct Parameters { struct Parameters {
double sampleRate; double sampleRate;
Parameters(double _sampleRate) : sampleRate(_sampleRate) { } bool shortWindowMode;
Parameters(double _sampleRate, bool _shortWindow) :
sampleRate(_sampleRate),
shortWindowMode(_shortWindow) { }
}; };
Guide(Parameters parameters, Log log) : Guide(Parameters parameters, Log log) :
@@ -120,7 +123,14 @@ public:
{ {
double rate = m_parameters.sampleRate; double rate = m_parameters.sampleRate;
m_log.log(1, "Guide: rate", rate); m_log.log(1, "Guide: rate and short-window mode",
rate, m_parameters.shortWindowMode);
if (m_parameters.shortWindowMode) {
m_minLower = 0.0;
m_defaultLower = 0.0;
m_maxLower = 0.0;
}
int bandFftSize = roundUp(int(ceil(rate/16.0))); int bandFftSize = roundUp(int(ceil(rate/16.0)));
m_configuration.fftBandLimits[0] = m_configuration.fftBandLimits[0] =
@@ -301,6 +311,11 @@ public:
guidance.phaseLockBands[3].beta = betaFor(10000.0, ratio); guidance.phaseLockBands[3].beta = betaFor(10000.0, ratio);
guidance.phaseLockBands[3].f0 = higher; guidance.phaseLockBands[3].f0 = higher;
guidance.phaseLockBands[3].f1 = nyquist; guidance.phaseLockBands[3].f1 = nyquist;
if (m_parameters.shortWindowMode) {
guidance.phaseLockBands[1].p = 1;
guidance.phaseLockBands[2].p = 2;
}
if (outhop > 256) { if (outhop > 256) {
guidance.phaseLockBands[3].p = 3; guidance.phaseLockBands[3].p = 3;

View File

@@ -43,8 +43,11 @@ public:
int fftSize; int fftSize;
double sampleRate; double sampleRate;
int channels; int channels;
Parameters(int _fftSize, double _sampleRate, int _channels) : bool shortWindowMode;
fftSize(_fftSize), sampleRate(_sampleRate), channels(_channels) { } Parameters(int _fftSize, double _sampleRate, int _channels,
bool _shortWindow) :
fftSize(_fftSize), sampleRate(_sampleRate), channels(_channels),
shortWindowMode(_shortWindow) { }
}; };
GuidedPhaseAdvance(Parameters parameters, Log log) : GuidedPhaseAdvance(Parameters parameters, Log log) :

View File

@@ -39,7 +39,10 @@ R3Stretcher::R3Stretcher(Parameters parameters,
m_timeRatio(initialTimeRatio), m_timeRatio(initialTimeRatio),
m_pitchScale(initialPitchScale), m_pitchScale(initialPitchScale),
m_formantScale(0.0), m_formantScale(0.0),
m_guide(Guide::Parameters(m_parameters.sampleRate), m_log), m_guide(Guide::Parameters
(m_parameters.sampleRate,
m_parameters.options & RubberBandStretcher::OptionWindowShort),
m_log),
m_guideConfiguration(m_guide.getConfiguration()), m_guideConfiguration(m_guide.getConfiguration()),
m_channelAssembly(m_parameters.channels), m_channelAssembly(m_parameters.channels),
m_inhop(1), m_inhop(1),
@@ -62,6 +65,16 @@ R3Stretcher::R3Stretcher(Parameters parameters,
m_log.log(1, "R3Stretcher::R3Stretcher: initial time ratio and pitch scale", m_log.log(1, "R3Stretcher::R3Stretcher: initial time ratio and pitch scale",
m_timeRatio, m_pitchScale); m_timeRatio, m_pitchScale);
if (isRealTime()) {
m_log.log(1, "R3Stretcher::R3Stretcher: real-time mode");
} else {
m_log.log(1, "R3Stretcher::R3Stretcher: offline mode");
}
if (isShortWindowed()) {
m_log.log(1, "R3Stretcher::R3Stretcher: intermediate shorter-window mode requested");
}
double maxClassifierFrequency = 16000.0; double maxClassifierFrequency = 16000.0;
if (maxClassifierFrequency > m_parameters.sampleRate/2) { if (maxClassifierFrequency > m_parameters.sampleRate/2) {
maxClassifierFrequency = m_parameters.sampleRate/2; maxClassifierFrequency = m_parameters.sampleRate/2;
@@ -98,7 +111,8 @@ R3Stretcher::R3Stretcher(Parameters parameters,
for (auto band: m_guideConfiguration.fftBandLimits) { for (auto band: m_guideConfiguration.fftBandLimits) {
int fftSize = band.fftSize; int fftSize = band.fftSize;
GuidedPhaseAdvance::Parameters guidedParameters GuidedPhaseAdvance::Parameters guidedParameters
(fftSize, m_parameters.sampleRate, m_parameters.channels); (fftSize, m_parameters.sampleRate, m_parameters.channels,
isShortWindowed());
m_scaleData[fftSize] = std::make_shared<ScaleData> m_scaleData[fftSize] = std::make_shared<ScaleData>
(guidedParameters, m_log); (guidedParameters, m_log);
} }
@@ -130,30 +144,44 @@ R3Stretcher::R3Stretcher(Parameters parameters,
} }
WindowType WindowType
R3Stretcher::ScaleData::analysisWindowShape(int fftSize) R3Stretcher::ScaleData::analysisWindowShape()
{ {
if (fftSize > 2048) return HannWindow; if (shortWindowMode) {
else return NiemitaloForwardWindow; if (fftSize >= 2048) return HannWindow;
else return NiemitaloForwardWindow;
} else {
if (fftSize > 2048) return HannWindow;
else return NiemitaloForwardWindow;
}
} }
int int
R3Stretcher::ScaleData::analysisWindowLength(int fftSize) R3Stretcher::ScaleData::analysisWindowLength()
{ {
return fftSize; return fftSize;
} }
WindowType WindowType
R3Stretcher::ScaleData::synthesisWindowShape(int fftSize) R3Stretcher::ScaleData::synthesisWindowShape()
{ {
if (fftSize > 2048) return HannWindow; if (shortWindowMode) {
else return NiemitaloReverseWindow; if (fftSize >= 2048) return HannWindow;
else return NiemitaloReverseWindow;
} else {
if (fftSize > 2048) return HannWindow;
else return NiemitaloReverseWindow;
}
} }
int int
R3Stretcher::ScaleData::synthesisWindowLength(int fftSize) R3Stretcher::ScaleData::synthesisWindowLength()
{ {
if (fftSize > 2048) return fftSize/2; if (shortWindowMode) {
else return fftSize; return fftSize;
} else {
if (fftSize > 2048) return fftSize/2;
else return fftSize;
}
} }
void void
@@ -292,6 +320,14 @@ R3Stretcher::calculateHop()
if (proposedOuthop > 512.0) proposedOuthop = 512.0; if (proposedOuthop > 512.0) proposedOuthop = 512.0;
if (proposedOuthop < 128.0) proposedOuthop = 128.0; if (proposedOuthop < 128.0) proposedOuthop = 128.0;
if (isShortWindowed()) {
// perhaps ironically, the short window mode actually uses a
// longer synthesis window for the 2048-bin FFT and, since
// reduced CPU consumption is the motivation, it can generally
// survive longer hops
proposedOuthop *= 1.5;
}
m_log.log(1, "calculateHop: ratio and proposed outhop", ratio, proposedOuthop); m_log.log(1, "calculateHop: ratio and proposed outhop", ratio, proposedOuthop);
double inhop = proposedOuthop / ratio; double inhop = proposedOuthop / ratio;
@@ -728,6 +764,10 @@ R3Stretcher::consume()
for (auto &it : m_channelData[0]->scales) { for (auto &it : m_channelData[0]->scales) {
int fftSize = it.first; int fftSize = it.first;
if (isShortWindowed() &&
fftSize == m_guideConfiguration.longestFftSize) {
continue;
}
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
auto &cd = m_channelData.at(c); auto &cd = m_channelData.at(c);
auto &scale = cd->scales.at(fftSize); auto &scale = cd->scales.at(fftSize);
@@ -898,7 +938,9 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
} }
// Finally window the longest scale // Finally window the longest scale
m_scaleData.at(longest)->analysisWindow.cut(buf); if (!isShortWindowed()) {
m_scaleData.at(longest)->analysisWindow.cut(buf);
}
// FFT shift, forward FFT, and carry out cartesian-polar // FFT shift, forward FFT, and carry out cartesian-polar
// conversion for each FFT size. // conversion for each FFT size.
@@ -956,9 +998,12 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
if (fftSize == classify && haveValidReadahead) { if (fftSize == classify && haveValidReadahead) {
continue; continue;
} }
if (isShortWindowed() && fftSize == longest) {
continue;
}
auto &scale = it.second; auto &scale = it.second;
v_fftshift(scale->timeDomain.data(), fftSize); v_fftshift(scale->timeDomain.data(), fftSize);
m_scaleData.at(fftSize)->fft.forward(scale->timeDomain.data(), m_scaleData.at(fftSize)->fft.forward(scale->timeDomain.data(),
@@ -1127,6 +1172,11 @@ R3Stretcher::adjustFormant(int c)
for (auto &it : cd->scales) { for (auto &it : cd->scales) {
int fftSize = it.first; int fftSize = it.first;
if (isShortWindowed() &&
fftSize == m_guideConfiguration.longestFftSize) {
continue;
}
auto &scale = it.second; auto &scale = it.second;
int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate)); int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate));
@@ -1156,6 +1206,8 @@ R3Stretcher::adjustFormant(int c)
void void
R3Stretcher::adjustPreKick(int c) R3Stretcher::adjustPreKick(int c)
{ {
if (isShortWindowed()) return;
Profiler profiler("R3Stretcher::adjustPreKick"); Profiler profiler("R3Stretcher::adjustPreKick");
auto &cd = m_channelData.at(c); auto &cd = m_channelData.at(c);
@@ -1197,6 +1249,11 @@ R3Stretcher::synthesiseChannel(int c, int outhop, bool draining)
for (const auto &band : cd->guidance.fftBands) { for (const auto &band : cd->guidance.fftBands) {
int fftSize = band.fftSize; int fftSize = band.fftSize;
if (isShortWindowed() && fftSize == longest) {
continue;
}
auto &scale = cd->scales.at(fftSize); auto &scale = cd->scales.at(fftSize);
auto &scaleData = m_scaleData.at(fftSize); auto &scaleData = m_scaleData.at(fftSize);

View File

@@ -253,19 +253,22 @@ protected:
struct ScaleData { struct ScaleData {
int fftSize; int fftSize;
bool shortWindowMode;
FFT fft; FFT fft;
Window<process_t> analysisWindow; Window<process_t> analysisWindow;
Window<process_t> synthesisWindow; Window<process_t> synthesisWindow;
process_t windowScaleFactor; process_t windowScaleFactor;
GuidedPhaseAdvance guided; GuidedPhaseAdvance guided;
ScaleData(GuidedPhaseAdvance::Parameters guidedParameters, ScaleData(GuidedPhaseAdvance::Parameters guidedParameters,
Log log) : Log log) :
fftSize(guidedParameters.fftSize), fftSize(guidedParameters.fftSize),
shortWindowMode(guidedParameters.shortWindowMode),
fft(fftSize), fft(fftSize),
analysisWindow(analysisWindowShape(fftSize), analysisWindow(analysisWindowShape(),
analysisWindowLength(fftSize)), analysisWindowLength()),
synthesisWindow(synthesisWindowShape(fftSize), synthesisWindow(synthesisWindowShape(),
synthesisWindowLength(fftSize)), synthesisWindowLength()),
windowScaleFactor(0.0), windowScaleFactor(0.0),
guided(guidedParameters, log) guided(guidedParameters, log)
{ {
@@ -277,10 +280,10 @@ protected:
} }
} }
WindowType analysisWindowShape(int fftSize); WindowType analysisWindowShape();
int analysisWindowLength(int fftSize); int analysisWindowLength();
WindowType synthesisWindowShape(int fftSize); WindowType synthesisWindowShape();
int synthesisWindowLength(int fftSize); int synthesisWindowLength();
}; };
Parameters m_parameters; Parameters m_parameters;
@@ -367,6 +370,11 @@ protected:
return m_parameters.options & return m_parameters.options &
RubberBandStretcher::OptionProcessRealTime; RubberBandStretcher::OptionProcessRealTime;
} }
bool isShortWindowed() const {
return m_parameters.options &
RubberBandStretcher::OptionWindowShort;
}
}; };
} }