Experimentally (re-)introduce short window mode
This commit is contained in:
@@ -105,7 +105,10 @@ public:
|
|||||||
|
|
||||||
struct Parameters {
|
struct Parameters {
|
||||||
double sampleRate;
|
double sampleRate;
|
||||||
Parameters(double _sampleRate) : sampleRate(_sampleRate) { }
|
bool shortWindowMode;
|
||||||
|
Parameters(double _sampleRate, bool _shortWindow) :
|
||||||
|
sampleRate(_sampleRate),
|
||||||
|
shortWindowMode(_shortWindow) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
Guide(Parameters parameters, Log log) :
|
Guide(Parameters parameters, Log log) :
|
||||||
@@ -120,7 +123,14 @@ public:
|
|||||||
{
|
{
|
||||||
double rate = m_parameters.sampleRate;
|
double rate = m_parameters.sampleRate;
|
||||||
|
|
||||||
m_log.log(1, "Guide: rate", rate);
|
m_log.log(1, "Guide: rate and short-window mode",
|
||||||
|
rate, m_parameters.shortWindowMode);
|
||||||
|
|
||||||
|
if (m_parameters.shortWindowMode) {
|
||||||
|
m_minLower = 0.0;
|
||||||
|
m_defaultLower = 0.0;
|
||||||
|
m_maxLower = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
int bandFftSize = roundUp(int(ceil(rate/16.0)));
|
int bandFftSize = roundUp(int(ceil(rate/16.0)));
|
||||||
m_configuration.fftBandLimits[0] =
|
m_configuration.fftBandLimits[0] =
|
||||||
@@ -302,6 +312,11 @@ public:
|
|||||||
guidance.phaseLockBands[3].f0 = higher;
|
guidance.phaseLockBands[3].f0 = higher;
|
||||||
guidance.phaseLockBands[3].f1 = nyquist;
|
guidance.phaseLockBands[3].f1 = nyquist;
|
||||||
|
|
||||||
|
if (m_parameters.shortWindowMode) {
|
||||||
|
guidance.phaseLockBands[1].p = 1;
|
||||||
|
guidance.phaseLockBands[2].p = 2;
|
||||||
|
}
|
||||||
|
|
||||||
if (outhop > 256) {
|
if (outhop > 256) {
|
||||||
guidance.phaseLockBands[3].p = 3;
|
guidance.phaseLockBands[3].p = 3;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,8 +43,11 @@ public:
|
|||||||
int fftSize;
|
int fftSize;
|
||||||
double sampleRate;
|
double sampleRate;
|
||||||
int channels;
|
int channels;
|
||||||
Parameters(int _fftSize, double _sampleRate, int _channels) :
|
bool shortWindowMode;
|
||||||
fftSize(_fftSize), sampleRate(_sampleRate), channels(_channels) { }
|
Parameters(int _fftSize, double _sampleRate, int _channels,
|
||||||
|
bool _shortWindow) :
|
||||||
|
fftSize(_fftSize), sampleRate(_sampleRate), channels(_channels),
|
||||||
|
shortWindowMode(_shortWindow) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
GuidedPhaseAdvance(Parameters parameters, Log log) :
|
GuidedPhaseAdvance(Parameters parameters, Log log) :
|
||||||
|
|||||||
@@ -39,7 +39,10 @@ R3Stretcher::R3Stretcher(Parameters parameters,
|
|||||||
m_timeRatio(initialTimeRatio),
|
m_timeRatio(initialTimeRatio),
|
||||||
m_pitchScale(initialPitchScale),
|
m_pitchScale(initialPitchScale),
|
||||||
m_formantScale(0.0),
|
m_formantScale(0.0),
|
||||||
m_guide(Guide::Parameters(m_parameters.sampleRate), m_log),
|
m_guide(Guide::Parameters
|
||||||
|
(m_parameters.sampleRate,
|
||||||
|
m_parameters.options & RubberBandStretcher::OptionWindowShort),
|
||||||
|
m_log),
|
||||||
m_guideConfiguration(m_guide.getConfiguration()),
|
m_guideConfiguration(m_guide.getConfiguration()),
|
||||||
m_channelAssembly(m_parameters.channels),
|
m_channelAssembly(m_parameters.channels),
|
||||||
m_inhop(1),
|
m_inhop(1),
|
||||||
@@ -62,6 +65,16 @@ R3Stretcher::R3Stretcher(Parameters parameters,
|
|||||||
m_log.log(1, "R3Stretcher::R3Stretcher: initial time ratio and pitch scale",
|
m_log.log(1, "R3Stretcher::R3Stretcher: initial time ratio and pitch scale",
|
||||||
m_timeRatio, m_pitchScale);
|
m_timeRatio, m_pitchScale);
|
||||||
|
|
||||||
|
if (isRealTime()) {
|
||||||
|
m_log.log(1, "R3Stretcher::R3Stretcher: real-time mode");
|
||||||
|
} else {
|
||||||
|
m_log.log(1, "R3Stretcher::R3Stretcher: offline mode");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isShortWindowed()) {
|
||||||
|
m_log.log(1, "R3Stretcher::R3Stretcher: intermediate shorter-window mode requested");
|
||||||
|
}
|
||||||
|
|
||||||
double maxClassifierFrequency = 16000.0;
|
double maxClassifierFrequency = 16000.0;
|
||||||
if (maxClassifierFrequency > m_parameters.sampleRate/2) {
|
if (maxClassifierFrequency > m_parameters.sampleRate/2) {
|
||||||
maxClassifierFrequency = m_parameters.sampleRate/2;
|
maxClassifierFrequency = m_parameters.sampleRate/2;
|
||||||
@@ -98,7 +111,8 @@ R3Stretcher::R3Stretcher(Parameters parameters,
|
|||||||
for (auto band: m_guideConfiguration.fftBandLimits) {
|
for (auto band: m_guideConfiguration.fftBandLimits) {
|
||||||
int fftSize = band.fftSize;
|
int fftSize = band.fftSize;
|
||||||
GuidedPhaseAdvance::Parameters guidedParameters
|
GuidedPhaseAdvance::Parameters guidedParameters
|
||||||
(fftSize, m_parameters.sampleRate, m_parameters.channels);
|
(fftSize, m_parameters.sampleRate, m_parameters.channels,
|
||||||
|
isShortWindowed());
|
||||||
m_scaleData[fftSize] = std::make_shared<ScaleData>
|
m_scaleData[fftSize] = std::make_shared<ScaleData>
|
||||||
(guidedParameters, m_log);
|
(guidedParameters, m_log);
|
||||||
}
|
}
|
||||||
@@ -130,30 +144,44 @@ R3Stretcher::R3Stretcher(Parameters parameters,
|
|||||||
}
|
}
|
||||||
|
|
||||||
WindowType
|
WindowType
|
||||||
R3Stretcher::ScaleData::analysisWindowShape(int fftSize)
|
R3Stretcher::ScaleData::analysisWindowShape()
|
||||||
{
|
{
|
||||||
if (fftSize > 2048) return HannWindow;
|
if (shortWindowMode) {
|
||||||
else return NiemitaloForwardWindow;
|
if (fftSize >= 2048) return HannWindow;
|
||||||
|
else return NiemitaloForwardWindow;
|
||||||
|
} else {
|
||||||
|
if (fftSize > 2048) return HannWindow;
|
||||||
|
else return NiemitaloForwardWindow;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
R3Stretcher::ScaleData::analysisWindowLength(int fftSize)
|
R3Stretcher::ScaleData::analysisWindowLength()
|
||||||
{
|
{
|
||||||
return fftSize;
|
return fftSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
WindowType
|
WindowType
|
||||||
R3Stretcher::ScaleData::synthesisWindowShape(int fftSize)
|
R3Stretcher::ScaleData::synthesisWindowShape()
|
||||||
{
|
{
|
||||||
if (fftSize > 2048) return HannWindow;
|
if (shortWindowMode) {
|
||||||
else return NiemitaloReverseWindow;
|
if (fftSize >= 2048) return HannWindow;
|
||||||
|
else return NiemitaloReverseWindow;
|
||||||
|
} else {
|
||||||
|
if (fftSize > 2048) return HannWindow;
|
||||||
|
else return NiemitaloReverseWindow;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
R3Stretcher::ScaleData::synthesisWindowLength(int fftSize)
|
R3Stretcher::ScaleData::synthesisWindowLength()
|
||||||
{
|
{
|
||||||
if (fftSize > 2048) return fftSize/2;
|
if (shortWindowMode) {
|
||||||
else return fftSize;
|
return fftSize;
|
||||||
|
} else {
|
||||||
|
if (fftSize > 2048) return fftSize/2;
|
||||||
|
else return fftSize;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -292,6 +320,14 @@ R3Stretcher::calculateHop()
|
|||||||
if (proposedOuthop > 512.0) proposedOuthop = 512.0;
|
if (proposedOuthop > 512.0) proposedOuthop = 512.0;
|
||||||
if (proposedOuthop < 128.0) proposedOuthop = 128.0;
|
if (proposedOuthop < 128.0) proposedOuthop = 128.0;
|
||||||
|
|
||||||
|
if (isShortWindowed()) {
|
||||||
|
// perhaps ironically, the short window mode actually uses a
|
||||||
|
// longer synthesis window for the 2048-bin FFT and, since
|
||||||
|
// reduced CPU consumption is the motivation, it can generally
|
||||||
|
// survive longer hops
|
||||||
|
proposedOuthop *= 1.5;
|
||||||
|
}
|
||||||
|
|
||||||
m_log.log(1, "calculateHop: ratio and proposed outhop", ratio, proposedOuthop);
|
m_log.log(1, "calculateHop: ratio and proposed outhop", ratio, proposedOuthop);
|
||||||
|
|
||||||
double inhop = proposedOuthop / ratio;
|
double inhop = proposedOuthop / ratio;
|
||||||
@@ -728,6 +764,10 @@ R3Stretcher::consume()
|
|||||||
|
|
||||||
for (auto &it : m_channelData[0]->scales) {
|
for (auto &it : m_channelData[0]->scales) {
|
||||||
int fftSize = it.first;
|
int fftSize = it.first;
|
||||||
|
if (isShortWindowed() &&
|
||||||
|
fftSize == m_guideConfiguration.longestFftSize) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
for (int c = 0; c < channels; ++c) {
|
for (int c = 0; c < channels; ++c) {
|
||||||
auto &cd = m_channelData.at(c);
|
auto &cd = m_channelData.at(c);
|
||||||
auto &scale = cd->scales.at(fftSize);
|
auto &scale = cd->scales.at(fftSize);
|
||||||
@@ -898,7 +938,9 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Finally window the longest scale
|
// Finally window the longest scale
|
||||||
m_scaleData.at(longest)->analysisWindow.cut(buf);
|
if (!isShortWindowed()) {
|
||||||
|
m_scaleData.at(longest)->analysisWindow.cut(buf);
|
||||||
|
}
|
||||||
|
|
||||||
// FFT shift, forward FFT, and carry out cartesian-polar
|
// FFT shift, forward FFT, and carry out cartesian-polar
|
||||||
// conversion for each FFT size.
|
// conversion for each FFT size.
|
||||||
@@ -956,6 +998,9 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
if (fftSize == classify && haveValidReadahead) {
|
if (fftSize == classify && haveValidReadahead) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
if (isShortWindowed() && fftSize == longest) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
auto &scale = it.second;
|
auto &scale = it.second;
|
||||||
|
|
||||||
@@ -1127,6 +1172,11 @@ R3Stretcher::adjustFormant(int c)
|
|||||||
for (auto &it : cd->scales) {
|
for (auto &it : cd->scales) {
|
||||||
|
|
||||||
int fftSize = it.first;
|
int fftSize = it.first;
|
||||||
|
if (isShortWindowed() &&
|
||||||
|
fftSize == m_guideConfiguration.longestFftSize) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
auto &scale = it.second;
|
auto &scale = it.second;
|
||||||
|
|
||||||
int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate));
|
int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate));
|
||||||
@@ -1156,6 +1206,8 @@ R3Stretcher::adjustFormant(int c)
|
|||||||
void
|
void
|
||||||
R3Stretcher::adjustPreKick(int c)
|
R3Stretcher::adjustPreKick(int c)
|
||||||
{
|
{
|
||||||
|
if (isShortWindowed()) return;
|
||||||
|
|
||||||
Profiler profiler("R3Stretcher::adjustPreKick");
|
Profiler profiler("R3Stretcher::adjustPreKick");
|
||||||
|
|
||||||
auto &cd = m_channelData.at(c);
|
auto &cd = m_channelData.at(c);
|
||||||
@@ -1197,6 +1249,11 @@ R3Stretcher::synthesiseChannel(int c, int outhop, bool draining)
|
|||||||
|
|
||||||
for (const auto &band : cd->guidance.fftBands) {
|
for (const auto &band : cd->guidance.fftBands) {
|
||||||
int fftSize = band.fftSize;
|
int fftSize = band.fftSize;
|
||||||
|
|
||||||
|
if (isShortWindowed() && fftSize == longest) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
auto &scale = cd->scales.at(fftSize);
|
auto &scale = cd->scales.at(fftSize);
|
||||||
auto &scaleData = m_scaleData.at(fftSize);
|
auto &scaleData = m_scaleData.at(fftSize);
|
||||||
|
|
||||||
|
|||||||
@@ -253,19 +253,22 @@ protected:
|
|||||||
|
|
||||||
struct ScaleData {
|
struct ScaleData {
|
||||||
int fftSize;
|
int fftSize;
|
||||||
|
bool shortWindowMode;
|
||||||
FFT fft;
|
FFT fft;
|
||||||
Window<process_t> analysisWindow;
|
Window<process_t> analysisWindow;
|
||||||
Window<process_t> synthesisWindow;
|
Window<process_t> synthesisWindow;
|
||||||
process_t windowScaleFactor;
|
process_t windowScaleFactor;
|
||||||
GuidedPhaseAdvance guided;
|
GuidedPhaseAdvance guided;
|
||||||
|
|
||||||
ScaleData(GuidedPhaseAdvance::Parameters guidedParameters,
|
ScaleData(GuidedPhaseAdvance::Parameters guidedParameters,
|
||||||
Log log) :
|
Log log) :
|
||||||
fftSize(guidedParameters.fftSize),
|
fftSize(guidedParameters.fftSize),
|
||||||
|
shortWindowMode(guidedParameters.shortWindowMode),
|
||||||
fft(fftSize),
|
fft(fftSize),
|
||||||
analysisWindow(analysisWindowShape(fftSize),
|
analysisWindow(analysisWindowShape(),
|
||||||
analysisWindowLength(fftSize)),
|
analysisWindowLength()),
|
||||||
synthesisWindow(synthesisWindowShape(fftSize),
|
synthesisWindow(synthesisWindowShape(),
|
||||||
synthesisWindowLength(fftSize)),
|
synthesisWindowLength()),
|
||||||
windowScaleFactor(0.0),
|
windowScaleFactor(0.0),
|
||||||
guided(guidedParameters, log)
|
guided(guidedParameters, log)
|
||||||
{
|
{
|
||||||
@@ -277,10 +280,10 @@ protected:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WindowType analysisWindowShape(int fftSize);
|
WindowType analysisWindowShape();
|
||||||
int analysisWindowLength(int fftSize);
|
int analysisWindowLength();
|
||||||
WindowType synthesisWindowShape(int fftSize);
|
WindowType synthesisWindowShape();
|
||||||
int synthesisWindowLength(int fftSize);
|
int synthesisWindowLength();
|
||||||
};
|
};
|
||||||
|
|
||||||
Parameters m_parameters;
|
Parameters m_parameters;
|
||||||
@@ -367,6 +370,11 @@ protected:
|
|||||||
return m_parameters.options &
|
return m_parameters.options &
|
||||||
RubberBandStretcher::OptionProcessRealTime;
|
RubberBandStretcher::OptionProcessRealTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isShortWindowed() const {
|
||||||
|
return m_parameters.options &
|
||||||
|
RubberBandStretcher::OptionWindowShort;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user