From f5b381e086e025467f75888642369efb8378ce64 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Wed, 25 May 2022 11:26:16 +0100 Subject: [PATCH] Pull out per-channel analysis and resynthesis functions --- src/finer/R3StretcherImpl.cpp | 542 +++++++++++++++++----------------- src/finer/R3StretcherImpl.h | 2 + 2 files changed, 279 insertions(+), 265 deletions(-) diff --git a/src/finer/R3StretcherImpl.cpp b/src/finer/R3StretcherImpl.cpp index 47a6fa8..bae8894 100644 --- a/src/finer/R3StretcherImpl.cpp +++ b/src/finer/R3StretcherImpl.cpp @@ -179,9 +179,7 @@ void R3StretcherImpl::consume() { double ratio = getEffectiveRatio(); - int longest = m_guideConfiguration.longestFftSize; - int classify = m_guideConfiguration.classificationFftSize; m_calculator->setDebugLevel(3); @@ -194,11 +192,14 @@ R3StretcherImpl::consume() std::cout << "outhop = " << outhop << std::endl; - double instantaneousRatio = double(m_prevOuthop) / double(m_inhop); - m_prevOuthop = outhop; - while (m_channelData.at(0)->outbuf->getWriteSpace() >= outhop) { + // NB our ChannelData, ScaleData, and ChannelScaleData maps + // contain shared_ptrs; whenever we retain one of them in a + // variable, we do so by reference to avoid copying the + // shared_ptr (as that is not realtime safe). Same goes for + // the map iterators + int readSpace = m_channelData.at(0)->inbuf->getReadSpace(); if (readSpace < longest) { if (m_draining) { @@ -210,158 +211,10 @@ R3StretcherImpl::consume() } } - // Analysis. This is per channel + // Analysis for (int c = 0; c < m_parameters.channels; ++c) { - - // Our ChannelData, ScaleData, and ChannelScaleData maps - // contain shared_ptrs; whenever we retain one of them in - // a variable here, we do so by reference to avoid copying - // the shared_ptr (as that is not realtime safe). Same - // goes for the map iterators - - auto &cd = m_channelData.at(c); - auto &longestScale = cd->scales.at(longest); - double *buf = longestScale->timeDomain.data(); - - if (readSpace < longest) { - cd->inbuf->peek(buf, readSpace); - v_zero(buf + readSpace, longest - readSpace); - } else { - cd->inbuf->peek(buf, longest); - } - - // We have a single unwindowed frame at the longest FFT - // size ("scale"). Populate the shorter FFT sizes from the - // centre of it, windowing as we copy. The classification - // scale is handled separately because it has readahead, - // so skip it here as well as the longest. (In practice - // this means we are probably only populating one scale) - - for (auto &it: cd->scales) { - int fftSize = it.first; - if (fftSize == classify || fftSize == longest) continue; - int offset = (longest - fftSize) / 2; - m_scaleData.at(fftSize)->analysisWindow.cut - (buf + offset, it.second->timeDomain.data()); - } - - // The classification scale has a one-hop readahead (note - // that inhop is fixed), so populate its current data from - // the readahead and the readahead from further down the - // long unwindowed frame. - - auto &classifyScale = cd->scales.at(classify); - ClassificationReadaheadData &readahead = cd->readahead; - - m_scaleData.at(classify)->analysisWindow.cut - (buf + (longest - classify) / 2 + m_inhop, - readahead.timeDomain.data()); - - // Finally window the longest scale - m_scaleData.at(longest)->analysisWindow.cut(buf); - - // FFT shift, forward FFT, and carry out cartesian-polar - // conversion for each FFT size. - - // For the classification scale we need magnitudes for the - // full range (polar only in a subset) and we operate in - // the readahead, pulling current values from the existing - // readahead - - v_fftshift(readahead.timeDomain.data(), classify); - - v_copy(classifyScale->mag.data(), - readahead.mag.data(), - classifyScale->bufSize); - - v_copy(classifyScale->phase.data(), - readahead.phase.data(), - classifyScale->bufSize); - - m_scaleData.at(classify)->fft.forward(readahead.timeDomain.data(), - classifyScale->real.data(), - classifyScale->imag.data()); - - for (const auto &b : m_guideConfiguration.fftBandLimits) { - if (b.fftSize == classify) { - if (b.b0min > 0) { - v_cartesian_to_magnitudes(readahead.mag.data(), - classifyScale->real.data(), - classifyScale->imag.data(), - b.b0min); - } - - v_cartesian_to_polar(readahead.mag.data() + b.b0min, - readahead.phase.data() + b.b0min, - classifyScale->real.data() + b.b0min, - classifyScale->imag.data() + b.b0min, - b.b1max - b.b0min); - - if (b.b1max < classify/2 + 1) { - v_cartesian_to_magnitudes - (readahead.mag.data() + b.b1max, - classifyScale->real.data() + b.b1max, - classifyScale->imag.data() + b.b1max, - classify/2 + 1 - b.b1max); - } - - v_scale(classifyScale->mag.data(), - 1.0 / double(classify), - classifyScale->mag.size()); - break; - } - } - - // For the others we operate directly in the scale data - // and restrict the range for cartesian-polar conversion - - for (auto &it: cd->scales) { - int fftSize = it.first; - if (fftSize == classify) continue; - auto &scale = it.second; - - v_fftshift(scale->timeDomain.data(), fftSize); - - m_scaleData.at(fftSize)->fft.forward(scale->timeDomain.data(), - scale->real.data(), - scale->imag.data()); - - //!!! This should be a map - for (const auto &b : m_guideConfiguration.fftBandLimits) { - if (b.fftSize == fftSize) { - v_cartesian_to_polar(scale->mag.data() + b.b0min, - scale->phase.data() + b.b0min, - scale->real.data() + b.b0min, - scale->imag.data() + b.b0min, - b.b1max - b.b0min); - v_scale(scale->mag.data() + b.b0min, - 1.0 / double(fftSize), - b.b1max - b.b0min); - break; - } - } - } - - // Use the classification scale to get a bin segmentation - // and calculate the adaptive frequency guide for this - // channel - cd->prevSegmentation = cd->segmentation; - cd->segmentation = cd->nextSegmentation; - cd->nextSegmentation = cd->segmenter->segment(readahead.mag.data()); - - m_troughPicker.findNearestAndNextPeaks - (classifyScale->mag.data(), 3, nullptr, - classifyScale->troughs.data()); - - m_guide.calculate(instantaneousRatio, - classifyScale->mag.data(), - classifyScale->troughs.data(), - classifyScale->prevMag.data(), - cd->segmentation, - cd->prevSegmentation, - cd->nextSegmentation, - cd->guidance); + analyseChannel(c, m_prevOuthop); } // Phase update. This is synchronised across all channels @@ -370,11 +223,11 @@ R3StretcherImpl::consume() int fftSize = it.first; for (int c = 0; c < m_parameters.channels; ++c) { auto &cd = m_channelData.at(c); - auto &classifyScale = cd->scales.at(fftSize); - m_channelAssembly.mag[c] = classifyScale->mag.data(); - m_channelAssembly.phase[c] = classifyScale->phase.data(); + auto &scale = cd->scales.at(fftSize); + m_channelAssembly.mag[c] = scale->mag.data(); + m_channelAssembly.phase[c] = scale->phase.data(); m_channelAssembly.guidance[c] = &cd->guidance; - m_channelAssembly.outPhase[c] = classifyScale->advancedPhase.data(); + m_channelAssembly.outPhase[c] = scale->advancedPhase.data(); } m_scaleData.at(fftSize)->guided.advance (m_channelAssembly.outPhase.data(), @@ -386,134 +239,293 @@ R3StretcherImpl::consume() outhop); } - // Resynthesis. This is per channel + // Resynthesis for (int c = 0; c < m_parameters.channels; ++c) { + synthesiseChannel(c, outhop); + } + } - auto &cd = m_channelData.at(c); + m_prevOuthop = outhop; +} - for (auto &it : cd->scales) { - auto &scale = it.second; - int bufSize = scale->bufSize; - // copy to prevMag before filtering - v_copy(scale->prevMag.data(), - scale->mag.data(), - bufSize); - } - - for (const auto &band : cd->guidance.fftBands) { - int fftSize = band.fftSize; - auto &scale = cd->scales.at(fftSize); - auto &scaleData = m_scaleData.at(fftSize); +void +R3StretcherImpl::analyseChannel(int c, int prevOuthop) +{ + int longest = m_guideConfiguration.longestFftSize; + int classify = m_guideConfiguration.classificationFftSize; - //!!! messy and slow, but leave it until we've - //!!! discovered whether we need a window accumulator - //!!! (we probably do) - int analysisWindowSize = scaleData->analysisWindow.getSize(); - int synthesisWindowSize = scaleData->synthesisWindow.getSize(); - int offset = (analysisWindowSize - synthesisWindowSize) / 2; - double winscale = 0.0; - for (int i = 0; i < synthesisWindowSize; ++i) { - winscale += scaleData->analysisWindow.getValue(i + offset) * - scaleData->synthesisWindow.getValue(i); - } - winscale = double(outhop) / winscale; + auto &cd = m_channelData.at(c); + double *buf = cd->scales.at(longest)->timeDomain.data(); - // The frequency filter is applied naively in the - // frequency domain. Aliasing is reduced by the - // shorter resynthesis window - - double factor = m_parameters.sampleRate / double(fftSize); - for (int i = 0; i < fftSize/2 + 1; ++i) { - double f = double(i) * factor; - if (f >= band.f0 && f < band.f1) { - //!!! check the mod 2 bit from stretch-fn - scale->mag[i] *= winscale; - } else { - scale->mag[i] = 0.f; - } - } - } + int readSpace = cd->inbuf->getReadSpace(); + if (readSpace < longest) { + cd->inbuf->peek(buf, readSpace); + v_zero(buf + readSpace, longest - readSpace); + } else { + cd->inbuf->peek(buf, longest); + } + + // We have a single unwindowed frame at the longest FFT + // size ("scale"). Populate the shorter FFT sizes from the + // centre of it, windowing as we copy. The classification + // scale is handled separately because it has readahead, + // so skip it here as well as the longest. (In practice + // this means we are probably only populating one scale) - // Resynthesise each FFT size (scale) individually, then - // sum. This is easier to manage scaling for in situations - // with a varying resynthesis hop + for (auto &it: cd->scales) { + int fftSize = it.first; + if (fftSize == classify || fftSize == longest) continue; + int offset = (longest - fftSize) / 2; + m_scaleData.at(fftSize)->analysisWindow.cut + (buf + offset, it.second->timeDomain.data()); + } + + // The classification scale has a one-hop readahead (note + // that inhop is fixed), so populate its current data from + // the readahead and the readahead from further down the + // long unwindowed frame. + + auto &classifyScale = cd->scales.at(classify); + ClassificationReadaheadData &readahead = cd->readahead; + + m_scaleData.at(classify)->analysisWindow.cut + (buf + (longest - classify) / 2 + m_inhop, + readahead.timeDomain.data()); - for (auto &it : cd->scales) { - int fftSize = it.first; - auto &scale = it.second; - auto &scaleData = m_scaleData.at(fftSize); - - for (const auto &b : m_guideConfiguration.fftBandLimits) { - if (b.fftSize == fftSize) { - int offset = b.b0min; - v_zero(scale->real.data(), fftSize/2 + 1); - v_zero(scale->imag.data(), fftSize/2 + 1); - v_polar_to_cartesian - (scale->real.data() + offset, - scale->imag.data() + offset, - scale->mag.data() + offset, - scale->advancedPhase.data() + offset, - b.b1max - offset); - break; - } - } + // Finally window the longest scale + m_scaleData.at(longest)->analysisWindow.cut(buf); - scaleData->fft.inverse(scale->real.data(), - scale->imag.data(), - scale->timeDomain.data()); + // FFT shift, forward FFT, and carry out cartesian-polar + // conversion for each FFT size. - v_fftshift(scale->timeDomain.data(), fftSize); + // For the classification scale we need magnitudes for the + // full range (polar only in a subset) and we operate in + // the readahead, pulling current values from the existing + // readahead - // Synthesis window is shorter than analysis window, - // so copy and cut only from the middle of the - // time-domain frame; and the accumulator length - // always matches the longest FFT size, so as to make - // mixing straightforward, so there is an additional - // offset needed for the target - - int synthesisWindowSize = scaleData->synthesisWindow.getSize(); - int fromOffset = (fftSize - synthesisWindowSize) / 2; - int toOffset = (m_guideConfiguration.longestFftSize - - synthesisWindowSize) / 2; + v_fftshift(readahead.timeDomain.data(), classify); - scaleData->synthesisWindow.cutAndAdd - (scale->timeDomain.data() + fromOffset, - scale->accumulator.data() + toOffset); - } - - // Mix and emit this channel + v_copy(classifyScale->mag.data(), + readahead.mag.data(), + classifyScale->bufSize); - double *mixptr = cd->mixdown.data(); - v_zero(mixptr, outhop); + v_copy(classifyScale->phase.data(), + readahead.phase.data(), + classifyScale->bufSize); - for (auto &it : cd->scales) { - auto &scale = it.second; - v_add(mixptr, scale->accumulator.data(), outhop); + m_scaleData.at(classify)->fft.forward(readahead.timeDomain.data(), + classifyScale->real.data(), + classifyScale->imag.data()); + + for (const auto &b : m_guideConfiguration.fftBandLimits) { + if (b.fftSize == classify) { + if (b.b0min > 0) { + v_cartesian_to_magnitudes(readahead.mag.data(), + classifyScale->real.data(), + classifyScale->imag.data(), + b.b0min); } - - cd->outbuf->write(mixptr, outhop); - - for (auto &it : cd->scales) { - int fftSize = it.first; - auto &scale = it.second; - double *accptr = scale->accumulator.data(); - - int n = scale->accumulator.size() - outhop; - v_move(accptr, accptr + outhop, n); - v_zero(accptr + n, outhop); + + v_cartesian_to_polar(readahead.mag.data() + b.b0min, + readahead.phase.data() + b.b0min, + classifyScale->real.data() + b.b0min, + classifyScale->imag.data() + b.b0min, + b.b1max - b.b0min); + + if (b.b1max < classify/2 + 1) { + v_cartesian_to_magnitudes + (readahead.mag.data() + b.b1max, + classifyScale->real.data() + b.b1max, + classifyScale->imag.data() + b.b1max, + classify/2 + 1 - b.b1max); } + + v_scale(classifyScale->mag.data(), + 1.0 / double(classify), + classifyScale->mag.size()); + break; + } + } + + // For the others we operate directly in the scale data + // and restrict the range for cartesian-polar conversion - if (readSpace < m_inhop) { - // This should happen only when draining - cd->inbuf->skip(readSpace); - } else { - cd->inbuf->skip(m_inhop); + for (auto &it: cd->scales) { + int fftSize = it.first; + if (fftSize == classify) continue; + auto &scale = it.second; + + v_fftshift(scale->timeDomain.data(), fftSize); + + m_scaleData.at(fftSize)->fft.forward(scale->timeDomain.data(), + scale->real.data(), + scale->imag.data()); + + //!!! This should be a map + for (const auto &b : m_guideConfiguration.fftBandLimits) { + if (b.fftSize == fftSize) { + v_cartesian_to_polar(scale->mag.data() + b.b0min, + scale->phase.data() + b.b0min, + scale->real.data() + b.b0min, + scale->imag.data() + b.b0min, + b.b1max - b.b0min); + v_scale(scale->mag.data() + b.b0min, + 1.0 / double(fftSize), + b.b1max - b.b0min); + break; } } } + + // Use the classification scale to get a bin segmentation + // and calculate the adaptive frequency guide for this + // channel + cd->prevSegmentation = cd->segmentation; + cd->segmentation = cd->nextSegmentation; + cd->nextSegmentation = cd->segmenter->segment(readahead.mag.data()); + + m_troughPicker.findNearestAndNextPeaks + (classifyScale->mag.data(), 3, nullptr, + classifyScale->troughs.data()); + + double instantaneousRatio = double(prevOuthop) / double(m_inhop); + m_guide.calculate(instantaneousRatio, + classifyScale->mag.data(), + classifyScale->troughs.data(), + classifyScale->prevMag.data(), + cd->segmentation, + cd->prevSegmentation, + cd->nextSegmentation, + cd->guidance); } +void +R3StretcherImpl::synthesiseChannel(int c, int outhop) +{ + int longest = m_guideConfiguration.longestFftSize; + + auto &cd = m_channelData.at(c); + + for (auto &it : cd->scales) { + auto &scale = it.second; + int bufSize = scale->bufSize; + // copy to prevMag before filtering + v_copy(scale->prevMag.data(), + scale->mag.data(), + bufSize); + } + + for (const auto &band : cd->guidance.fftBands) { + int fftSize = band.fftSize; + auto &scale = cd->scales.at(fftSize); + auto &scaleData = m_scaleData.at(fftSize); + + //!!! messy and slow, but leave it until we've + //!!! discovered whether we need a window accumulator + //!!! (we probably do) + int analysisWindowSize = scaleData->analysisWindow.getSize(); + int synthesisWindowSize = scaleData->synthesisWindow.getSize(); + int offset = (analysisWindowSize - synthesisWindowSize) / 2; + double winscale = 0.0; + for (int i = 0; i < synthesisWindowSize; ++i) { + winscale += scaleData->analysisWindow.getValue(i + offset) * + scaleData->synthesisWindow.getValue(i); + } + winscale = double(outhop) / winscale; + + // The frequency filter is applied naively in the + // frequency domain. Aliasing is reduced by the + // shorter resynthesis window + + double factor = m_parameters.sampleRate / double(fftSize); + for (int i = 0; i < fftSize/2 + 1; ++i) { + double f = double(i) * factor; + if (f >= band.f0 && f < band.f1) { + //!!! check the mod 2 bit from stretch-fn + scale->mag[i] *= winscale; + } else { + scale->mag[i] = 0.f; + } + } + } + + // Resynthesise each FFT size (scale) individually, then + // sum. This is easier to manage scaling for in situations + // with a varying resynthesis hop + + for (auto &it : cd->scales) { + int fftSize = it.first; + auto &scale = it.second; + auto &scaleData = m_scaleData.at(fftSize); + + for (const auto &b : m_guideConfiguration.fftBandLimits) { + if (b.fftSize == fftSize) { + int offset = b.b0min; + v_zero(scale->real.data(), fftSize/2 + 1); + v_zero(scale->imag.data(), fftSize/2 + 1); + v_polar_to_cartesian + (scale->real.data() + offset, + scale->imag.data() + offset, + scale->mag.data() + offset, + scale->advancedPhase.data() + offset, + b.b1max - offset); + break; + } + } + + scaleData->fft.inverse(scale->real.data(), + scale->imag.data(), + scale->timeDomain.data()); + + v_fftshift(scale->timeDomain.data(), fftSize); + + // Synthesis window is shorter than analysis window, + // so copy and cut only from the middle of the + // time-domain frame; and the accumulator length + // always matches the longest FFT size, so as to make + // mixing straightforward, so there is an additional + // offset needed for the target + + int synthesisWindowSize = scaleData->synthesisWindow.getSize(); + int fromOffset = (fftSize - synthesisWindowSize) / 2; + int toOffset = (longest - synthesisWindowSize) / 2; + + scaleData->synthesisWindow.cutAndAdd + (scale->timeDomain.data() + fromOffset, + scale->accumulator.data() + toOffset); + } + + // Mix and emit this channel + + double *mixptr = cd->mixdown.data(); + v_zero(mixptr, outhop); + + for (auto &it : cd->scales) { + auto &scale = it.second; + v_add(mixptr, scale->accumulator.data(), outhop); + } + + cd->outbuf->write(mixptr, outhop); + + for (auto &it : cd->scales) { + int fftSize = it.first; + auto &scale = it.second; + double *accptr = scale->accumulator.data(); + + int n = scale->accumulator.size() - outhop; + v_move(accptr, accptr + outhop, n); + v_zero(accptr + n, outhop); + } + + int readSpace = cd->inbuf->getReadSpace(); + if (readSpace < m_inhop) { + // This should happen only when draining + cd->inbuf->skip(readSpace); + } else { + cd->inbuf->skip(m_inhop); + } +} } diff --git a/src/finer/R3StretcherImpl.h b/src/finer/R3StretcherImpl.h index 1cc9d67..9cd349e 100644 --- a/src/finer/R3StretcherImpl.h +++ b/src/finer/R3StretcherImpl.h @@ -248,6 +248,8 @@ protected: void consume(); void calculateHop(); + void analyseChannel(int channel, int prevOuthop); + void synthesiseChannel(int channel, int outhop); double getEffectiveRatio() const { return m_timeRatio * m_pitchScale;