Pull out per-channel analysis and resynthesis functions

2022-05-25 11:26:16 +01:00
parent 47476b9088
commit f5b381e086
2 changed files with 279 additions and 265 deletions
--- a/src/finer/R3StretcherImpl.cpp
+++ b/src/finer/R3StretcherImpl.cpp
@@ -179,9 +179,7 @@ void
 R3StretcherImpl::consume()
 {
    double ratio = getEffectiveRatio();
    int longest = m_guideConfiguration.longestFftSize;
    int classify = m_guideConfiguration.classificationFftSize;
    m_calculator->setDebugLevel(3);
@@ -194,11 +192,14 @@ R3StretcherImpl::consume()
    std::cout << "outhop = " << outhop << std::endl;
    double instantaneousRatio = double(m_prevOuthop) / double(m_inhop);
    m_prevOuthop = outhop;
    while (m_channelData.at(0)->outbuf->getWriteSpace() >= outhop) {
        // NB our ChannelData, ScaleData, and ChannelScaleData maps
        // contain shared_ptrs; whenever we retain one of them in a
        // variable, we do so by reference to avoid copying the
        // shared_ptr (as that is not realtime safe). Same goes for
        // the map iterators
        int readSpace = m_channelData.at(0)->inbuf->getReadSpace();
        if (readSpace < longest) {
            if (m_draining) {
@@ -210,158 +211,10 @@ R3StretcherImpl::consume()
            }
        }
-        // Analysis. This is per channel
+        // Analysis
        for (int c = 0; c < m_parameters.channels; ++c) {
-
+            analyseChannel(c, m_prevOuthop);
            // Our ChannelData, ScaleData, and ChannelScaleData maps
            // contain shared_ptrs; whenever we retain one of them in
            // a variable here, we do so by reference to avoid copying
            // the shared_ptr (as that is not realtime safe). Same
            // goes for the map iterators
            auto &cd = m_channelData.at(c);
            auto &longestScale = cd->scales.at(longest);
            double *buf = longestScale->timeDomain.data();
            if (readSpace < longest) {
                cd->inbuf->peek(buf, readSpace);
                v_zero(buf + readSpace, longest - readSpace);
            } else {
                cd->inbuf->peek(buf, longest);
            }
            // We have a single unwindowed frame at the longest FFT
            // size ("scale"). Populate the shorter FFT sizes from the
            // centre of it, windowing as we copy. The classification
            // scale is handled separately because it has readahead,
            // so skip it here as well as the longest. (In practice
            // this means we are probably only populating one scale)
            for (auto &it: cd->scales) {
                int fftSize = it.first;
                if (fftSize == classify || fftSize == longest) continue;
                int offset = (longest - fftSize) / 2;
                m_scaleData.at(fftSize)->analysisWindow.cut
                    (buf + offset, it.second->timeDomain.data());
            }
            // The classification scale has a one-hop readahead (note
            // that inhop is fixed), so populate its current data from
            // the readahead and the readahead from further down the
            // long unwindowed frame.
            auto &classifyScale = cd->scales.at(classify);
            ClassificationReadaheadData &readahead = cd->readahead;
            m_scaleData.at(classify)->analysisWindow.cut
                (buf + (longest - classify) / 2 + m_inhop,
                 readahead.timeDomain.data());
            // Finally window the longest scale
            m_scaleData.at(longest)->analysisWindow.cut(buf);
            // FFT shift, forward FFT, and carry out cartesian-polar
            // conversion for each FFT size.
            // For the classification scale we need magnitudes for the
            // full range (polar only in a subset) and we operate in
            // the readahead, pulling current values from the existing
            // readahead
            v_fftshift(readahead.timeDomain.data(), classify);
            v_copy(classifyScale->mag.data(),
                   readahead.mag.data(),
                   classifyScale->bufSize);
            v_copy(classifyScale->phase.data(),
                   readahead.phase.data(),
                   classifyScale->bufSize);
            m_scaleData.at(classify)->fft.forward(readahead.timeDomain.data(),
                                                  classifyScale->real.data(),
                                                  classifyScale->imag.data());
            for (const auto &b : m_guideConfiguration.fftBandLimits) {
                if (b.fftSize == classify) {
                    if (b.b0min > 0) {
                        v_cartesian_to_magnitudes(readahead.mag.data(),
                                                  classifyScale->real.data(),
                                                  classifyScale->imag.data(),
                                                  b.b0min);
                    }
                    v_cartesian_to_polar(readahead.mag.data() + b.b0min,
                                         readahead.phase.data() + b.b0min,
                                         classifyScale->real.data() + b.b0min,
                                         classifyScale->imag.data() + b.b0min,
                                         b.b1max - b.b0min);
                    if (b.b1max < classify/2 + 1) {
                        v_cartesian_to_magnitudes
                            (readahead.mag.data() + b.b1max,
                             classifyScale->real.data() + b.b1max,
                             classifyScale->imag.data() + b.b1max,
                             classify/2 + 1 - b.b1max);
                    }
                    v_scale(classifyScale->mag.data(),
                            1.0 / double(classify),
                            classifyScale->mag.size());
                    break;
                }
            }
            // For the others we operate directly in the scale data
            // and restrict the range for cartesian-polar conversion
            for (auto &it: cd->scales) {
                int fftSize = it.first;
                if (fftSize == classify) continue;
                auto &scale = it.second;
                v_fftshift(scale->timeDomain.data(), fftSize);
                m_scaleData.at(fftSize)->fft.forward(scale->timeDomain.data(),
                                                     scale->real.data(),
                                                     scale->imag.data());
                //!!! This should be a map
                for (const auto &b : m_guideConfiguration.fftBandLimits) {
                    if (b.fftSize == fftSize) {
                        v_cartesian_to_polar(scale->mag.data() + b.b0min,
                                             scale->phase.data() + b.b0min,
                                             scale->real.data() + b.b0min,
                                             scale->imag.data() + b.b0min,
                                             b.b1max - b.b0min);
                        v_scale(scale->mag.data() + b.b0min,
                                1.0 / double(fftSize),
                                b.b1max - b.b0min);
                        break;
                    }
                }
            }
            // Use the classification scale to get a bin segmentation
            // and calculate the adaptive frequency guide for this
            // channel
            cd->prevSegmentation = cd->segmentation;
            cd->segmentation = cd->nextSegmentation;
            cd->nextSegmentation = cd->segmenter->segment(readahead.mag.data());
            m_troughPicker.findNearestAndNextPeaks
                (classifyScale->mag.data(), 3, nullptr,
                 classifyScale->troughs.data());
            m_guide.calculate(instantaneousRatio,
                              classifyScale->mag.data(),
                              classifyScale->troughs.data(),
                              classifyScale->prevMag.data(),
                              cd->segmentation,
                              cd->prevSegmentation,
                              cd->nextSegmentation,
                              cd->guidance);
        }
        // Phase update. This is synchronised across all channels
@@ -370,11 +223,11 @@ R3StretcherImpl::consume()
            int fftSize = it.first;
            for (int c = 0; c < m_parameters.channels; ++c) {
                auto &cd = m_channelData.at(c);
-                auto &classifyScale = cd->scales.at(fftSize);
+                auto &scale = cd->scales.at(fftSize);
-                m_channelAssembly.mag[c] = classifyScale->mag.data();
+                m_channelAssembly.mag[c] = scale->mag.data();
-                m_channelAssembly.phase[c] = classifyScale->phase.data();
+                m_channelAssembly.phase[c] = scale->phase.data();
                m_channelAssembly.guidance[c] = &cd->guidance;
-                m_channelAssembly.outPhase[c] = classifyScale->advancedPhase.data();
+                m_channelAssembly.outPhase[c] = scale->advancedPhase.data();
            }
            m_scaleData.at(fftSize)->guided.advance
                (m_channelAssembly.outPhase.data(),
@@ -386,134 +239,293 @@ R3StretcherImpl::consume()
                 outhop);
        }
-        // Resynthesis. This is per channel
+        // Resynthesis
        for (int c = 0; c < m_parameters.channels; ++c) {
            synthesiseChannel(c, outhop);
        }
    }
-            auto &cd = m_channelData.at(c);
+    m_prevOuthop = outhop;
 }
-            for (auto &it : cd->scales) {
+void
-                auto &scale = it.second;
+R3StretcherImpl::analyseChannel(int c, int prevOuthop)
-                int bufSize = scale->bufSize;
+{
-                // copy to prevMag before filtering
+    int longest = m_guideConfiguration.longestFftSize;
-                v_copy(scale->prevMag.data(),
+    int classify = m_guideConfiguration.classificationFftSize;
                       scale->mag.data(),
                       bufSize);
            }
            for (const auto &band : cd->guidance.fftBands) {
                int fftSize = band.fftSize;
                auto &scale = cd->scales.at(fftSize);
                auto &scaleData = m_scaleData.at(fftSize);
-                //!!! messy and slow, but leave it until we've
+    auto &cd = m_channelData.at(c);
-                //!!! discovered whether we need a window accumulator
+    double *buf = cd->scales.at(longest)->timeDomain.data();
                //!!! (we probably do)
                int analysisWindowSize = scaleData->analysisWindow.getSize();
                int synthesisWindowSize = scaleData->synthesisWindow.getSize();
                int offset = (analysisWindowSize - synthesisWindowSize) / 2;
                double winscale = 0.0;
                for (int i = 0; i < synthesisWindowSize; ++i) {
                    winscale += scaleData->analysisWindow.getValue(i + offset) *
                        scaleData->synthesisWindow.getValue(i);
                }
                winscale = double(outhop) / winscale;
-                // The frequency filter is applied naively in the
+    int readSpace = cd->inbuf->getReadSpace();
-                // frequency domain. Aliasing is reduced by the
+    if (readSpace < longest) {
-                // shorter resynthesis window
+        cd->inbuf->peek(buf, readSpace);
-                
+        v_zero(buf + readSpace, longest - readSpace);
-                double factor = m_parameters.sampleRate / double(fftSize);
+    } else {
-                for (int i = 0; i < fftSize/2 + 1; ++i) {
+        cd->inbuf->peek(buf, longest);
-                    double f = double(i) * factor;
+    }
-                    if (f >= band.f0 && f < band.f1) {
+    
-                        //!!! check the mod 2 bit from stretch-fn
+    // We have a single unwindowed frame at the longest FFT
-                        scale->mag[i] *= winscale;
+    // size ("scale"). Populate the shorter FFT sizes from the
-                    } else {
+    // centre of it, windowing as we copy. The classification
-                        scale->mag[i] = 0.f;
+    // scale is handled separately because it has readahead,
-                    }
+    // so skip it here as well as the longest. (In practice
-                }
+    // this means we are probably only populating one scale)
            }
-            // Resynthesise each FFT size (scale) individually, then
+    for (auto &it: cd->scales) {
-            // sum. This is easier to manage scaling for in situations
+        int fftSize = it.first;
-            // with a varying resynthesis hop
+        if (fftSize == classify || fftSize == longest) continue;
        int offset = (longest - fftSize) / 2;
        m_scaleData.at(fftSize)->analysisWindow.cut
            (buf + offset, it.second->timeDomain.data());
    }
    // The classification scale has a one-hop readahead (note
    // that inhop is fixed), so populate its current data from
    // the readahead and the readahead from further down the
    // long unwindowed frame.
    auto &classifyScale = cd->scales.at(classify);
    ClassificationReadaheadData &readahead = cd->readahead;
    m_scaleData.at(classify)->analysisWindow.cut
        (buf + (longest - classify) / 2 + m_inhop,
         readahead.timeDomain.data());
-            for (auto &it : cd->scales) {
+    // Finally window the longest scale
-                int fftSize = it.first;
+    m_scaleData.at(longest)->analysisWindow.cut(buf);
                auto &scale = it.second;
                auto &scaleData = m_scaleData.at(fftSize);
                for (const auto &b : m_guideConfiguration.fftBandLimits) {
                    if (b.fftSize == fftSize) {
                        int offset = b.b0min;
                        v_zero(scale->real.data(), fftSize/2 + 1);
                        v_zero(scale->imag.data(), fftSize/2 + 1);
                        v_polar_to_cartesian
                            (scale->real.data() + offset,
                             scale->imag.data() + offset,
                             scale->mag.data() + offset,
                             scale->advancedPhase.data() + offset,
                             b.b1max - offset);
                        break;
                    }
                }
-                scaleData->fft.inverse(scale->real.data(),
+    // FFT shift, forward FFT, and carry out cartesian-polar
-                                       scale->imag.data(),
+    // conversion for each FFT size.
                                       scale->timeDomain.data());
-                v_fftshift(scale->timeDomain.data(), fftSize);
+    // For the classification scale we need magnitudes for the
    // full range (polar only in a subset) and we operate in
    // the readahead, pulling current values from the existing
    // readahead
-                // Synthesis window is shorter than analysis window,
+    v_fftshift(readahead.timeDomain.data(), classify);
                // so copy and cut only from the middle of the
                // time-domain frame; and the accumulator length
                // always matches the longest FFT size, so as to make
                // mixing straightforward, so there is an additional
                // offset needed for the target
                int synthesisWindowSize = scaleData->synthesisWindow.getSize();
                int fromOffset = (fftSize - synthesisWindowSize) / 2;
                int toOffset = (m_guideConfiguration.longestFftSize -
                                synthesisWindowSize) / 2;
-                scaleData->synthesisWindow.cutAndAdd
+    v_copy(classifyScale->mag.data(),
-                    (scale->timeDomain.data() + fromOffset,
+           readahead.mag.data(),
-                     scale->accumulator.data() + toOffset);
+           classifyScale->bufSize);
            }
            // Mix and emit this channel
-            double *mixptr = cd->mixdown.data();
+    v_copy(classifyScale->phase.data(),
-            v_zero(mixptr, outhop);
+           readahead.phase.data(),
           classifyScale->bufSize);
-            for (auto &it : cd->scales) {
+    m_scaleData.at(classify)->fft.forward(readahead.timeDomain.data(),
-                auto &scale = it.second;
+                                          classifyScale->real.data(),
-                v_add(mixptr, scale->accumulator.data(), outhop);
+                                          classifyScale->imag.data());
    for (const auto &b : m_guideConfiguration.fftBandLimits) {
        if (b.fftSize == classify) {
            if (b.b0min > 0) {
                v_cartesian_to_magnitudes(readahead.mag.data(),
                                          classifyScale->real.data(),
                                          classifyScale->imag.data(),
                                          b.b0min);
            }
-
+                    
-            cd->outbuf->write(mixptr, outhop);
+            v_cartesian_to_polar(readahead.mag.data() + b.b0min,
-
+                                 readahead.phase.data() + b.b0min,
-            for (auto &it : cd->scales) {
+                                 classifyScale->real.data() + b.b0min,
-                int fftSize = it.first;
+                                 classifyScale->imag.data() + b.b0min,
-                auto &scale = it.second;
+                                 b.b1max - b.b0min);
-                double *accptr = scale->accumulator.data();
+                    
-
+            if (b.b1max < classify/2 + 1) {
-                int n = scale->accumulator.size() - outhop;
+                v_cartesian_to_magnitudes
-                v_move(accptr, accptr + outhop, n);
+                    (readahead.mag.data() + b.b1max,
-                v_zero(accptr + n, outhop);
+                     classifyScale->real.data() + b.b1max,
                     classifyScale->imag.data() + b.b1max,
                     classify/2 + 1 - b.b1max);
            }
            v_scale(classifyScale->mag.data(),
                    1.0 / double(classify),
                    classifyScale->mag.size());
            break;
        }
    }
    // For the others we operate directly in the scale data
    // and restrict the range for cartesian-polar conversion
-            if (readSpace < m_inhop) {
+    for (auto &it: cd->scales) {
-                // This should happen only when draining
+        int fftSize = it.first;
-                cd->inbuf->skip(readSpace);
+        if (fftSize == classify) continue;
-            } else {
+        auto &scale = it.second;
-                cd->inbuf->skip(m_inhop);
+
        v_fftshift(scale->timeDomain.data(), fftSize);
        m_scaleData.at(fftSize)->fft.forward(scale->timeDomain.data(),
                                             scale->real.data(),
                                             scale->imag.data());
        //!!! This should be a map
        for (const auto &b : m_guideConfiguration.fftBandLimits) {
            if (b.fftSize == fftSize) {
                v_cartesian_to_polar(scale->mag.data() + b.b0min,
                                     scale->phase.data() + b.b0min,
                                     scale->real.data() + b.b0min,
                                     scale->imag.data() + b.b0min,
                                     b.b1max - b.b0min);
                v_scale(scale->mag.data() + b.b0min,
                        1.0 / double(fftSize),
                        b.b1max - b.b0min);
                break;
            }
        }
    }
    // Use the classification scale to get a bin segmentation
    // and calculate the adaptive frequency guide for this
    // channel
    cd->prevSegmentation = cd->segmentation;
    cd->segmentation = cd->nextSegmentation;
    cd->nextSegmentation = cd->segmenter->segment(readahead.mag.data());
    m_troughPicker.findNearestAndNextPeaks
        (classifyScale->mag.data(), 3, nullptr,
         classifyScale->troughs.data());
    double instantaneousRatio = double(prevOuthop) / double(m_inhop);
    m_guide.calculate(instantaneousRatio,
                      classifyScale->mag.data(),
                      classifyScale->troughs.data(),
                      classifyScale->prevMag.data(),
                      cd->segmentation,
                      cd->prevSegmentation,
                      cd->nextSegmentation,
                      cd->guidance);
 }
 void
 R3StretcherImpl::synthesiseChannel(int c, int outhop)
 {
    int longest = m_guideConfiguration.longestFftSize;
    auto &cd = m_channelData.at(c);
    for (auto &it : cd->scales) {
        auto &scale = it.second;
        int bufSize = scale->bufSize;
        // copy to prevMag before filtering
        v_copy(scale->prevMag.data(),
               scale->mag.data(),
               bufSize);
    }
    for (const auto &band : cd->guidance.fftBands) {
        int fftSize = band.fftSize;
        auto &scale = cd->scales.at(fftSize);
        auto &scaleData = m_scaleData.at(fftSize);
        //!!! messy and slow, but leave it until we've
        //!!! discovered whether we need a window accumulator
        //!!! (we probably do)
        int analysisWindowSize = scaleData->analysisWindow.getSize();
        int synthesisWindowSize = scaleData->synthesisWindow.getSize();
        int offset = (analysisWindowSize - synthesisWindowSize) / 2;
        double winscale = 0.0;
        for (int i = 0; i < synthesisWindowSize; ++i) {
            winscale += scaleData->analysisWindow.getValue(i + offset) *
                scaleData->synthesisWindow.getValue(i);
        }
        winscale = double(outhop) / winscale;
        // The frequency filter is applied naively in the
        // frequency domain. Aliasing is reduced by the
        // shorter resynthesis window
        double factor = m_parameters.sampleRate / double(fftSize);
        for (int i = 0; i < fftSize/2 + 1; ++i) {
            double f = double(i) * factor;
            if (f >= band.f0 && f < band.f1) {
                //!!! check the mod 2 bit from stretch-fn
                scale->mag[i] *= winscale;
            } else {
                scale->mag[i] = 0.f;
            }
        }
    }
    // Resynthesise each FFT size (scale) individually, then
    // sum. This is easier to manage scaling for in situations
    // with a varying resynthesis hop
    for (auto &it : cd->scales) {
        int fftSize = it.first;
        auto &scale = it.second;
        auto &scaleData = m_scaleData.at(fftSize);
        for (const auto &b : m_guideConfiguration.fftBandLimits) {
            if (b.fftSize == fftSize) {
                int offset = b.b0min;
                v_zero(scale->real.data(), fftSize/2 + 1);
                v_zero(scale->imag.data(), fftSize/2 + 1);
                v_polar_to_cartesian
                    (scale->real.data() + offset,
                     scale->imag.data() + offset,
                     scale->mag.data() + offset,
                     scale->advancedPhase.data() + offset,
                     b.b1max - offset);
                break;
            }
        }
        scaleData->fft.inverse(scale->real.data(),
                               scale->imag.data(),
                               scale->timeDomain.data());
        v_fftshift(scale->timeDomain.data(), fftSize);
        // Synthesis window is shorter than analysis window,
        // so copy and cut only from the middle of the
        // time-domain frame; and the accumulator length
        // always matches the longest FFT size, so as to make
        // mixing straightforward, so there is an additional
        // offset needed for the target
        int synthesisWindowSize = scaleData->synthesisWindow.getSize();
        int fromOffset = (fftSize - synthesisWindowSize) / 2;
        int toOffset = (longest - synthesisWindowSize) / 2;
        scaleData->synthesisWindow.cutAndAdd
            (scale->timeDomain.data() + fromOffset,
             scale->accumulator.data() + toOffset);
    }
    // Mix and emit this channel
    double *mixptr = cd->mixdown.data();
    v_zero(mixptr, outhop);
    for (auto &it : cd->scales) {
        auto &scale = it.second;
        v_add(mixptr, scale->accumulator.data(), outhop);
    }
    cd->outbuf->write(mixptr, outhop);
    for (auto &it : cd->scales) {
        int fftSize = it.first;
        auto &scale = it.second;
        double *accptr = scale->accumulator.data();
        int n = scale->accumulator.size() - outhop;
        v_move(accptr, accptr + outhop, n);
        v_zero(accptr + n, outhop);
    }
    int readSpace = cd->inbuf->getReadSpace();
    if (readSpace < m_inhop) {
        // This should happen only when draining
        cd->inbuf->skip(readSpace);
    } else {
        cd->inbuf->skip(m_inhop);
    }
 }
 }
--- a/src/finer/R3StretcherImpl.h
+++ b/src/finer/R3StretcherImpl.h
@@ -248,6 +248,8 @@ protected:
    void consume();
    void calculateHop();
    void analyseChannel(int channel, int prevOuthop);
    void synthesiseChannel(int channel, int outhop);
    double getEffectiveRatio() const {
        return m_timeRatio * m_pitchScale;