Toward properly expressing the window sizes in Guide when in single-window mode, so as to separate the time-domain frame length from the longest FFT within R3Stretcher and allow us to use a shorter time-domain frame. Not working correctly in single-window mode yet.

This commit is contained in:
Chris Cannam
2022-08-03 14:16:17 +01:00
parent 9fff2836c6
commit 2fa0e1162e
3 changed files with 264 additions and 195 deletions

View File

@@ -74,6 +74,14 @@ R3Stretcher::R3Stretcher(Parameters parameters,
if (isSingleWindowed()) {
m_log.log(1, "R3Stretcher::R3Stretcher: intermediate shorter-window mode requested");
}
if (m_guideConfiguration.longestFftSize >
m_guideConfiguration.classificationFftSize) {
m_timeDomainFrameLength = m_guideConfiguration.longestFftSize;
} else {
m_timeDomainFrameLength =
(m_guideConfiguration.classificationFftSize * 3) / 2;
}
double maxClassifierFrequency = 16000.0;
if (maxClassifierFrequency > m_parameters.sampleRate/2) {
@@ -90,25 +98,27 @@ R3Stretcher::R3Stretcher(Parameters parameters,
BinClassifier::Parameters classifierParameters
(classificationBins, 9, 1, 10, 2.0, 2.0);
int inRingBufferSize = m_guideConfiguration.longestFftSize * 2;
int outRingBufferSize = m_guideConfiguration.longestFftSize * 16;
int inRingBufferSize = m_timeDomainFrameLength * 2;
int outRingBufferSize = m_timeDomainFrameLength * 16;
for (int c = 0; c < m_parameters.channels; ++c) {
m_channelData.push_back(std::make_shared<ChannelData>
(segmenterParameters,
classifierParameters,
m_guideConfiguration.longestFftSize,
m_timeDomainFrameLength,
inRingBufferSize,
outRingBufferSize));
for (auto band: m_guideConfiguration.fftBandLimits) {
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
const auto &band = m_guideConfiguration.fftBandLimits[b];
int fftSize = band.fftSize;
m_channelData[c]->scales[fftSize] =
std::make_shared<ChannelScaleData>
(fftSize, m_guideConfiguration.longestFftSize);
(fftSize, m_timeDomainFrameLength);
}
}
for (auto band: m_guideConfiguration.fftBandLimits) {
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
const auto &band = m_guideConfiguration.fftBandLimits[b];
int fftSize = band.fftSize;
GuidedPhaseAdvance::Parameters guidedParameters
(fftSize, m_parameters.sampleRate, m_parameters.channels,
@@ -273,7 +283,7 @@ R3Stretcher::createResampler()
}
resamplerParameters.initialSampleRate = m_parameters.sampleRate;
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize;
resamplerParameters.maxBufferSize = m_timeDomainFrameLength;
if (isRealTime()) {
// If we knew the caller would never change ratio, we could
@@ -447,7 +457,7 @@ R3Stretcher::getPreferredStartPad() const
if (!isRealTime()) {
return 0;
} else {
return m_guideConfiguration.longestFftSize / 2;
return m_timeDomainFrameLength / 2;
}
}
@@ -458,7 +468,7 @@ R3Stretcher::getStartDelay() const
return 0;
} else {
double factor = 0.5 / m_pitchScale;
return size_t(ceil(m_guideConfiguration.longestFftSize * factor));
return size_t(ceil(m_timeDomainFrameLength * factor));
}
}
@@ -531,10 +541,9 @@ size_t
R3Stretcher::getSamplesRequired() const
{
if (available() != 0) return 0;
int longest = m_guideConfiguration.longestFftSize;
int rs = m_channelData[0]->inbuf->getReadSpace();
if (rs < longest) {
return longest - rs;
if (rs < m_timeDomainFrameLength) {
return m_timeDomainFrameLength - rs;
} else {
return 0;
}
@@ -544,7 +553,7 @@ void
R3Stretcher::setMaxProcessSize(size_t n)
{
size_t oldSize = m_channelData[0]->inbuf->getSize();
size_t newSize = m_guideConfiguration.longestFftSize + n;
size_t newSize = m_timeDomainFrameLength + n;
if (newSize > oldSize) {
m_log.log(1, "setMaxProcessSize: resizing from and to", oldSize, newSize);
@@ -599,11 +608,11 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final)
createResampler();
}
// Pad to half the longest frame. As with R2, in real-time
// mode we don't do this -- it's better to start with a
// swoosh than introduce more latency, and we don't want
// gaps when the ratio changes.
int pad = m_guideConfiguration.longestFftSize / 2;
// Pad to half the frame. As with R2, in real-time mode we
// don't do this -- it's better to start with a swoosh
// than introduce more latency, and we don't want gaps
// when the ratio changes.
int pad = m_timeDomainFrameLength / 2;
m_log.log(1, "offline mode: prefilling with", pad);
for (int c = 0; c < m_parameters.channels; ++c) {
m_channelData[c]->inbuf->zero(pad);
@@ -735,7 +744,7 @@ R3Stretcher::consume()
// the map iterators
int readSpace = cd0->inbuf->getReadSpace();
if (readSpace < longest) {
if (readSpace < m_timeDomainFrameLength) {
if (m_mode == ProcessMode::Finished) {
if (readSpace == 0) {
int fill = cd0->scales.at(longest)->accumulatorFill;
@@ -761,10 +770,6 @@ R3Stretcher::consume()
for (auto &it : m_channelData[0]->scales) {
int fftSize = it.first;
if (isSingleWindowed() &&
fftSize != m_guideConfiguration.classificationFftSize) {
continue;
}
for (int c = 0; c < channels; ++c) {
auto &cd = m_channelData.at(c);
auto &scale = cd->scales.at(fftSize);
@@ -887,6 +892,8 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
auto &cd = m_channelData.at(c);
process_t *buf = cd->scales.at(longest)->timeDomain.data();
//!!! review
int readSpace = cd->inbuf->getReadSpace();
if (readSpace < longest) {
cd->inbuf->peek(buf, readSpace);
@@ -900,16 +907,15 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
// it, windowing as we copy. The classification scale is handled
// separately because it has readahead, so skip it here as well as
// the longest. (In practice this means we are probably only
// populating one scale)
// populating one scale in multi-window mode, and none at all in
// single-window mode)
if (!isSingleWindowed()) {
for (auto &it: cd->scales) {
int fftSize = it.first;
if (fftSize == classify || fftSize == longest) continue;
int offset = (longest - fftSize) / 2;
m_scaleData.at(fftSize)->analysisWindow.cut
(buf + offset, it.second->timeDomain.data());
}
for (auto &it: cd->scales) {
int fftSize = it.first;
if (fftSize == classify || fftSize == longest) continue;
int offset = (longest - fftSize) / 2;
m_scaleData.at(fftSize)->analysisWindow.cut
(buf + offset, it.second->timeDomain.data());
}
// The classification scale has a one-hop readahead, so populate
@@ -937,7 +943,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
}
// Finally window the longest scale
if (!isSingleWindowed()) {
if (classify != longest) {
m_scaleData.at(longest)->analysisWindow.cut(buf);
}
@@ -964,14 +970,14 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
classifyScale->real.data(),
classifyScale->imag.data());
for (const auto &b : m_guideConfiguration.fftBandLimits) {
if (b.fftSize == classify) {
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
const auto &band = m_guideConfiguration.fftBandLimits[b];
if (band.fftSize == classify) {
ToPolarSpec spec;
spec.magFromBin = 0;
spec.magBinCount = classify/2 + 1;
spec.polarFromBin = b.b0min;
spec.polarBinCount = b.b1max - b.b0min + 1;
spec.polarFromBin = band.b0min;
spec.polarBinCount = band.b1max - band.b0min + 1;
convertToPolar(readahead.mag.data(),
readahead.phase.data(),
classifyScale->real.data(),
@@ -997,9 +1003,6 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
if (fftSize == classify && haveValidReadahead) {
continue;
}
if (isSingleWindowed() && fftSize != classify) {
continue;
}
auto &scale = it.second;
@@ -1009,8 +1012,9 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
scale->real.data(),
scale->imag.data());
for (const auto &b : m_guideConfiguration.fftBandLimits) {
if (b.fftSize == fftSize) {
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
const auto &band = m_guideConfiguration.fftBandLimits[b];
if (band.fftSize == fftSize) {
ToPolarSpec spec;
@@ -1024,11 +1028,11 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
if (fftSize == classify) {
spec.magFromBin = 0;
spec.magBinCount = classify/2 + 1;
spec.polarFromBin = b.b0min;
spec.polarBinCount = b.b1max - b.b0min + 1;
spec.polarFromBin = band.b0min;
spec.polarBinCount = band.b1max - band.b0min + 1;
} else {
spec.magFromBin = b.b0min;
spec.magBinCount = b.b1max - b.b0min + 1;
spec.magFromBin = band.b0min;
spec.magBinCount = band.b1max - band.b0min + 1;
spec.polarFromBin = spec.magFromBin;
spec.polarBinCount = spec.magBinCount;
}
@@ -1171,11 +1175,6 @@ R3Stretcher::adjustFormant(int c)
for (auto &it : cd->scales) {
int fftSize = it.first;
if (isSingleWindowed() &&
fftSize != m_guideConfiguration.classificationFftSize) {
continue;
}
auto &scale = it.second;
int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate));
@@ -1186,9 +1185,10 @@ R3Stretcher::adjustFormant(int c)
process_t maxRatio = 60.0;
process_t minRatio = 1.0 / maxRatio;
for (const auto &b : m_guideConfiguration.fftBandLimits) {
if (b.fftSize != fftSize) continue;
for (int i = b.b0min; i < b.b1max && i < highBin; ++i) {
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
const auto &band = m_guideConfiguration.fftBandLimits[b];
if (band.fftSize != fftSize) continue;
for (int i = band.b0min; i < band.b1max && i < highBin; ++i) {
process_t source = cd->formant->envelopeAt(i * sourceFactor);
process_t target = cd->formant->envelopeAt(i * targetFactor);
if (target > 0.0) {
@@ -1205,6 +1205,8 @@ R3Stretcher::adjustFormant(int c)
void
R3Stretcher::adjustPreKick(int c)
{
//!!! if we aren't going to do this, we should modify Guide so as
//!!! not to do the small additional work of checking for it
if (isSingleWindowed()) return;
Profiler profiler("R3Stretcher::adjustPreKick");
@@ -1245,14 +1247,11 @@ R3Stretcher::synthesiseChannel(int c, int outhop, bool draining)
int longest = m_guideConfiguration.longestFftSize;
auto &cd = m_channelData.at(c);
for (const auto &band : cd->guidance.fftBands) {
int fftSize = band.fftSize;
if (isSingleWindowed() &&
fftSize != m_guideConfiguration.classificationFftSize) {
continue;
}
for (int b = 0; b < cd->guidance.fftBandCount; ++b) {
const auto &band = cd->guidance.fftBands[b];
int fftSize = band.fftSize;
auto &scale = cd->scales.at(fftSize);
auto &scaleData = m_scaleData.at(fftSize);