From 7179c2626cbd901aa665af0ea56a69cfd3057277 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 14 Jul 2022 09:42:10 +0100 Subject: [PATCH 01/41] Update Profiler to use std::chrono --- src/common/Profiler.cpp | 32 ++++++-------------------------- src/common/Profiler.h | 23 ++++++----------------- src/common/sysutils.cpp | 16 ---------------- src/common/sysutils.h | 5 ----- 4 files changed, 12 insertions(+), 64 deletions(-) diff --git a/src/common/Profiler.cpp b/src/common/Profiler.cpp index a606cfa..142f0ee 100644 --- a/src/common/Profiler.cpp +++ b/src/common/Profiler.cpp @@ -50,7 +50,7 @@ Profiler::m_worstCalls; static Mutex profileMutex; void -Profiler::add(const char *id, float ms) +Profiler::add(const char *id, double ms) { profileMutex.lock(); @@ -95,7 +95,7 @@ Profiler::getReport() #endif report += buffer; - typedef std::multimap TimeRMap; + typedef std::multimap TimeRMap; typedef std::multimap IntRMap; TimeRMap totmap, avgmap, worstmap; IntRMap ncallmap; @@ -186,11 +186,7 @@ Profiler::Profiler(const char* c) : m_c(c), m_ended(false) { -#ifdef PROFILE_CLOCKS - m_start = clock(); -#else - (void)gettimeofday(&m_start, 0); -#endif + m_start = std::chrono::steady_clock::now(); } Profiler::~Profiler() @@ -201,25 +197,9 @@ Profiler::~Profiler() void Profiler::end() { -#ifdef PROFILE_CLOCKS - clock_t end = clock(); - clock_t elapsed = end - m_start; - float ms = float((double(elapsed) / double(CLOCKS_PER_SEC)) * 1000.0); -#else - struct timeval tv; - (void)gettimeofday(&tv, 0); - - tv.tv_sec -= m_start.tv_sec; - if (tv.tv_usec < m_start.tv_usec) { - tv.tv_usec += 1000000; - tv.tv_sec -= 1; - } - tv.tv_usec -= m_start.tv_usec; - float ms = float((double(tv.tv_sec) + (double(tv.tv_usec) / 1000000.0)) * 1000.0); -#endif - - add(m_c, ms); - + auto finish = std::chrono::steady_clock::now(); + std::chrono::duration ms = finish - m_start; + add(m_c, ms.count()); m_ended = true; } diff --git a/src/common/Profiler.h b/src/common/Profiler.h index 37ec04c..dbb923d 100644 --- a/src/common/Profiler.h +++ b/src/common/Profiler.h @@ -39,14 +39,7 @@ #endif #ifndef NO_TIMING -#ifdef PROFILE_CLOCKS -#include -#else -#include "sysutils.h" -#ifndef _WIN32 -#include -#endif -#endif +#include #endif #ifndef NO_TIMING @@ -75,21 +68,17 @@ public: static std::string getReport(); protected: - const char* m_c; -#ifdef PROFILE_CLOCKS - clock_t m_start; -#else - struct timeval m_start; -#endif + const char *const m_c; + std::chrono::time_point m_start; bool m_showOnDestruct; bool m_ended; - typedef std::pair TimePair; + typedef std::pair TimePair; typedef std::map ProfileMap; - typedef std::map WorstCallMap; + typedef std::map WorstCallMap; static ProfileMap m_profiles; static WorstCallMap m_worstCalls; - static void add(const char *, float); + static void add(const char *, double); }; #else diff --git a/src/common/sysutils.cpp b/src/common/sysutils.cpp index 6f36ae0..0a13b7e 100644 --- a/src/common/sysutils.cpp +++ b/src/common/sysutils.cpp @@ -150,22 +150,6 @@ system_is_multiprocessor() return mp; } -#ifdef _WIN32 - -void gettimeofday(struct timeval *tv, void *tz) -{ - union { - long long ns100; - FILETIME ft; - } now; - - ::GetSystemTimeAsFileTime(&now.ft); - tv->tv_usec = (long)((now.ns100 / 10LL) % 1000000LL); - tv->tv_sec = (long)((now.ns100 - 116444736000000000LL) / 10000000LL); -} - -#endif - void system_specific_initialise() { #if defined HAVE_IPP diff --git a/src/common/sysutils.h b/src/common/sysutils.h index 9bcdd9a..c59e1bf 100644 --- a/src/common/sysutils.h +++ b/src/common/sysutils.h @@ -99,11 +99,6 @@ extern bool system_is_multiprocessor(); extern void system_specific_initialise(); extern void system_specific_application_initialise(); -#ifdef _WIN32 -struct timeval { long tv_sec; long tv_usec; }; -void gettimeofday(struct timeval *p, void *tz); -#endif // _WIN32 - } // end namespace // The following should be functions in the RubberBand namespace, really From dc74c993cf1d93dc7c870ec5523bf1b9951ff4e2 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 14 Jul 2022 10:02:39 +0100 Subject: [PATCH 02/41] Add profile points; switch to microseconds --- src/common/Profiler.cpp | 88 +++++++++++++++++++------------------- src/faster/R2Stretcher.cpp | 2 + src/finer/BinClassifier.h | 3 ++ src/finer/BinSegmenter.h | 4 ++ src/finer/Guide.h | 3 ++ src/finer/PhaseAdvance.h | 3 ++ src/finer/R3Stretcher.cpp | 25 +++++++++++ 7 files changed, 85 insertions(+), 43 deletions(-) diff --git a/src/common/Profiler.cpp b/src/common/Profiler.cpp index 142f0ee..b0466e2 100644 --- a/src/common/Profiler.cpp +++ b/src/common/Profiler.cpp @@ -50,23 +50,23 @@ Profiler::m_worstCalls; static Mutex profileMutex; void -Profiler::add(const char *id, double ms) +Profiler::add(const char *id, double us) { profileMutex.lock(); ProfileMap::iterator pmi = m_profiles.find(id); if (pmi != m_profiles.end()) { ++pmi->second.first; - pmi->second.second += ms; + pmi->second.second += us; } else { - m_profiles[id] = TimePair(1, ms); + m_profiles[id] = TimePair(1, us); } WorstCallMap::iterator wci = m_worstCalls.find(id); if (wci != m_worstCalls.end()) { - if (ms > wci->second) wci->second = ms; + if (us > wci->second) wci->second = us; } else { - m_worstCalls[id] = ms; + m_worstCalls[id] = us; } profileMutex.unlock(); @@ -100,6 +100,8 @@ Profiler::getReport() TimeRMap totmap, avgmap, worstmap; IntRMap ncallmap; + const unsigned char mu_s[] = { 0xce, 0xbc, 's', 0x0 }; + for (ProfileMap::const_iterator i = m_profiles.begin(); i != m_profiles.end(); ++i) { totmap.insert(TimeRMap::value_type(i->second.second, i->first)); @@ -113,38 +115,6 @@ Profiler::getReport() worstmap.insert(TimeRMap::value_type(i->second, i->first)); } - snprintf(buffer, buflen, "\nBy total:\n"); - report += buffer; - for (TimeRMap::const_iterator i = totmap.end(); i != totmap.begin(); ) { - --i; - snprintf(buffer, buflen, "%-40s %f ms\n", i->second, i->first); - report += buffer; - } - - snprintf(buffer, buflen, "\nBy average:\n"); - report += buffer; - for (TimeRMap::const_iterator i = avgmap.end(); i != avgmap.begin(); ) { - --i; - snprintf(buffer, buflen, "%-40s %f ms\n", i->second, i->first); - report += buffer; - } - - snprintf(buffer, buflen, "\nBy worst case:\n"); - report += buffer; - for (TimeRMap::const_iterator i = worstmap.end(); i != worstmap.begin(); ) { - --i; - snprintf(buffer, buflen, "%-40s %f ms\n", i->second, i->first); - report += buffer; - } - - snprintf(buffer, buflen, "\nBy number of calls:\n"); - report += buffer; - for (IntRMap::const_iterator i = ncallmap.end(); i != ncallmap.begin(); ) { - --i; - snprintf(buffer, buflen, "%-40s %d\n", i->second, i->first); - report += buffer; - } - snprintf(buffer, buflen, "\nBy name:\n"); report += buffer; @@ -165,15 +135,47 @@ Profiler::getReport() const TimePair &pp(j->second); snprintf(buffer, buflen, "%s(%d):\n", *i, pp.first); report += buffer; - snprintf(buffer, buflen, "\tReal: \t%f ms \t[%f ms total]\n", - (pp.second / pp.first), - (pp.second)); + snprintf(buffer, buflen, "\tReal: \t%12f %s \t[%f %s total]\n", + (pp.second / pp.first), mu_s, + (pp.second), mu_s); report += buffer; WorstCallMap::const_iterator k = m_worstCalls.find(*i); if (k == m_worstCalls.end()) continue; - snprintf(buffer, buflen, "\tWorst:\t%f ms/call\n", k->second); + snprintf(buffer, buflen, "\tWorst:\t%14f %s/call\n", k->second, mu_s); + report += buffer; + } + + snprintf(buffer, buflen, "\nBy total:\n"); + report += buffer; + for (TimeRMap::const_iterator i = totmap.end(); i != totmap.begin(); ) { + --i; + snprintf(buffer, buflen, "%-40s %14f %s\n", i->second, i->first, mu_s); + report += buffer; + } + + snprintf(buffer, buflen, "\nBy average:\n"); + report += buffer; + for (TimeRMap::const_iterator i = avgmap.end(); i != avgmap.begin(); ) { + --i; + snprintf(buffer, buflen, "%-40s %14f %s\n", i->second, i->first, mu_s); + report += buffer; + } + + snprintf(buffer, buflen, "\nBy worst case:\n"); + report += buffer; + for (TimeRMap::const_iterator i = worstmap.end(); i != worstmap.begin(); ) { + --i; + snprintf(buffer, buflen, "%-40s %14f %s\n", i->second, i->first, mu_s); + report += buffer; + } + + snprintf(buffer, buflen, "\nBy number of calls:\n"); + report += buffer; + for (IntRMap::const_iterator i = ncallmap.end(); i != ncallmap.begin(); ) { + --i; + snprintf(buffer, buflen, "%-40s %14d\n", i->second, i->first); report += buffer; } @@ -198,8 +200,8 @@ void Profiler::end() { auto finish = std::chrono::steady_clock::now(); - std::chrono::duration ms = finish - m_start; - add(m_c, ms.count()); + std::chrono::duration us = finish - m_start; + add(m_c, us.count()); m_ended = true; } diff --git a/src/faster/R2Stretcher.cpp b/src/faster/R2Stretcher.cpp index 4546895..4033c11 100644 --- a/src/faster/R2Stretcher.cpp +++ b/src/faster/R2Stretcher.cpp @@ -101,6 +101,8 @@ R2Stretcher::R2Stretcher(size_t sampleRate, m_freq2(12000), m_baseFftSize(m_defaultFftSize) { + Profiler profiler("R2Stretcher::R2Stretcher"); + if (!_initialised) { system_specific_initialise(); _initialised = true; diff --git a/src/finer/BinClassifier.h b/src/finer/BinClassifier.h index 2c58d29..d0431cb 100644 --- a/src/finer/BinClassifier.h +++ b/src/finer/BinClassifier.h @@ -27,6 +27,7 @@ #include "../common/Allocators.h" #include "../common/MovingMedian.h" #include "../common/RingBuffer.h" +#include "../common/Profiler.h" #include #include @@ -97,6 +98,8 @@ public: void classify(const process_t *const mag, // input, of at least binCount bins Classification *classification) // output, of binCount bins { + Profiler profiler("BinClassifier::classify"); + const int n = m_parameters.binCount; for (int i = 0; i < n; ++i) { diff --git a/src/finer/BinSegmenter.h b/src/finer/BinSegmenter.h index 705d255..90c29b7 100644 --- a/src/finer/BinSegmenter.h +++ b/src/finer/BinSegmenter.h @@ -28,6 +28,7 @@ #include "../common/HistogramFilter.h" #include "../common/mathmisc.h" +#include "../common/Profiler.h" #include @@ -65,6 +66,9 @@ public: } Segmentation segment(const BinClassifier::Classification *classification) { + + Profiler profiler("BinSegmenter::segment"); + int n = m_parameters.binCount; for (int i = 0; i < n; ++i) { switch (classification[i]) { diff --git a/src/finer/Guide.h b/src/finer/Guide.h index 108fd27..f14b24b 100644 --- a/src/finer/Guide.h +++ b/src/finer/Guide.h @@ -25,6 +25,7 @@ #define RUBBERBAND_GUIDE_H #include "../common/Log.h" +#include "../common/Profiler.h" #include #include @@ -159,6 +160,8 @@ public: bool tighterChannelLock, Guidance &guidance) const { + Profiler profiler("Guide::updateGuidance"); + bool hadPhaseReset = guidance.phaseReset.present; guidance.phaseReset.present = false; diff --git a/src/finer/PhaseAdvance.h b/src/finer/PhaseAdvance.h index aec35b9..ba200bb 100644 --- a/src/finer/PhaseAdvance.h +++ b/src/finer/PhaseAdvance.h @@ -28,6 +28,7 @@ #include "../common/Log.h" #include "../common/mathmisc.h" +#include "../common/Profiler.h" #include #include @@ -93,6 +94,8 @@ public: int inhop, int outhop) { + Profiler profiler("GuidedPhaseAdvance::advance"); + int myFftBand = 0; int i = 0; for (const auto &fband : guidance[0]->fftBands) { diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index dd2f947..f7be7bc 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -24,6 +24,7 @@ #include "R3Stretcher.h" #include "../common/VectorOpsComplex.h" +#include "../common/Profiler.h" #include @@ -54,6 +55,8 @@ R3Stretcher::R3Stretcher(Parameters parameters, m_totalOutputDuration(0), m_mode(ProcessMode::JustCreated) { + Profiler profiler("R3Stretcher::R3Stretcher"); + m_log.log(1, "R3Stretcher::R3Stretcher: rate, options", m_parameters.sampleRate, m_parameters.options); m_log.log(1, "R3Stretcher::R3Stretcher: initial time ratio and pitch scale", @@ -233,6 +236,8 @@ R3Stretcher::setKeyFrameMap(const std::map &mapping) void R3Stretcher::createResampler() { + Profiler profiler("R3Stretcher::createResampler"); + Resampler::Parameters resamplerParameters; if (m_parameters.options & RubberBandStretcher::OptionPitchHighQuality) { @@ -463,6 +468,8 @@ R3Stretcher::reset() void R3Stretcher::study(const float *const *, size_t samples, bool) { + Profiler profiler("R3Stretcher::study"); + if (isRealTime()) { m_log.log(0, "R3Stretcher::study: Not meaningful in realtime mode"); return; @@ -520,6 +527,8 @@ R3Stretcher::setMaxProcessSize(size_t n) void R3Stretcher::process(const float *const *input, size_t samples, bool final) { + Profiler profiler("R3Stretcher::process"); + if (m_mode == ProcessMode::Finished) { m_log.log(0, "R3Stretcher::process: Cannot process again after final chunk"); return; @@ -615,6 +624,8 @@ R3Stretcher::available() const size_t R3Stretcher::retrieve(float *const *output, size_t samples) const { + Profiler profiler("R3Stretcher::retrieve"); + int got = samples; for (int c = 0; c < m_parameters.channels; ++c) { @@ -633,6 +644,8 @@ R3Stretcher::retrieve(float *const *output, size_t samples) const void R3Stretcher::consume() { + Profiler profiler("R3Stretcher::consume"); + int longest = m_guideConfiguration.longestFftSize; int channels = m_parameters.channels; int inhop = m_inhop; @@ -680,6 +693,8 @@ R3Stretcher::consume() while (cd0->outbuf->getWriteSpace() >= outhop) { + Profiler profiler("R3Stretcher::consume/loop"); + // NB our ChannelData, ScaleData, and ChannelScaleData maps // contain shared_ptrs; whenever we retain one of them in a // variable, we do so by reference to avoid copying the @@ -827,6 +842,8 @@ R3Stretcher::consume() void R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) { + Profiler profiler("R3Stretcher::analyseChannel"); + int longest = m_guideConfiguration.longestFftSize; int classify = m_guideConfiguration.classificationFftSize; @@ -1067,6 +1084,8 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) void R3Stretcher::analyseFormant(int c) { + Profiler profiler("R3Stretcher::analyseFormant"); + auto &cd = m_channelData.at(c); auto &f = *cd->formant; @@ -1101,6 +1120,8 @@ R3Stretcher::analyseFormant(int c) void R3Stretcher::adjustFormant(int c) { + Profiler profiler("R3Stretcher::adjustFormant"); + auto &cd = m_channelData.at(c); for (auto &it : cd->scales) { @@ -1135,6 +1156,8 @@ R3Stretcher::adjustFormant(int c) void R3Stretcher::adjustPreKick(int c) { + Profiler profiler("R3Stretcher::adjustPreKick"); + auto &cd = m_channelData.at(c); auto fftSize = cd->guidance.fftBands[0].fftSize; if (cd->guidance.preKick.present) { @@ -1166,6 +1189,8 @@ R3Stretcher::adjustPreKick(int c) void R3Stretcher::synthesiseChannel(int c, int outhop, bool draining) { + Profiler profiler("R3Stretcher::synthesiseChannel"); + int longest = m_guideConfiguration.longestFftSize; auto &cd = m_channelData.at(c); From 02928a3c86ef72e9c4f2c549abf57aff61ce8d32 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 14 Jul 2022 11:55:21 +0100 Subject: [PATCH 03/41] Experimentally (re-)introduce short window mode --- src/finer/Guide.h | 19 ++++++++- src/finer/PhaseAdvance.h | 7 +++- src/finer/R3Stretcher.cpp | 85 ++++++++++++++++++++++++++++++++------- src/finer/R3Stretcher.h | 24 +++++++---- 4 files changed, 109 insertions(+), 26 deletions(-) diff --git a/src/finer/Guide.h b/src/finer/Guide.h index f14b24b..9e5f5b2 100644 --- a/src/finer/Guide.h +++ b/src/finer/Guide.h @@ -105,7 +105,10 @@ public: struct Parameters { double sampleRate; - Parameters(double _sampleRate) : sampleRate(_sampleRate) { } + bool shortWindowMode; + Parameters(double _sampleRate, bool _shortWindow) : + sampleRate(_sampleRate), + shortWindowMode(_shortWindow) { } }; Guide(Parameters parameters, Log log) : @@ -120,7 +123,14 @@ public: { double rate = m_parameters.sampleRate; - m_log.log(1, "Guide: rate", rate); + m_log.log(1, "Guide: rate and short-window mode", + rate, m_parameters.shortWindowMode); + + if (m_parameters.shortWindowMode) { + m_minLower = 0.0; + m_defaultLower = 0.0; + m_maxLower = 0.0; + } int bandFftSize = roundUp(int(ceil(rate/16.0))); m_configuration.fftBandLimits[0] = @@ -301,6 +311,11 @@ public: guidance.phaseLockBands[3].beta = betaFor(10000.0, ratio); guidance.phaseLockBands[3].f0 = higher; guidance.phaseLockBands[3].f1 = nyquist; + + if (m_parameters.shortWindowMode) { + guidance.phaseLockBands[1].p = 1; + guidance.phaseLockBands[2].p = 2; + } if (outhop > 256) { guidance.phaseLockBands[3].p = 3; diff --git a/src/finer/PhaseAdvance.h b/src/finer/PhaseAdvance.h index ba200bb..4474c9e 100644 --- a/src/finer/PhaseAdvance.h +++ b/src/finer/PhaseAdvance.h @@ -43,8 +43,11 @@ public: int fftSize; double sampleRate; int channels; - Parameters(int _fftSize, double _sampleRate, int _channels) : - fftSize(_fftSize), sampleRate(_sampleRate), channels(_channels) { } + bool shortWindowMode; + Parameters(int _fftSize, double _sampleRate, int _channels, + bool _shortWindow) : + fftSize(_fftSize), sampleRate(_sampleRate), channels(_channels), + shortWindowMode(_shortWindow) { } }; GuidedPhaseAdvance(Parameters parameters, Log log) : diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index f7be7bc..c65fb1a 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -39,7 +39,10 @@ R3Stretcher::R3Stretcher(Parameters parameters, m_timeRatio(initialTimeRatio), m_pitchScale(initialPitchScale), m_formantScale(0.0), - m_guide(Guide::Parameters(m_parameters.sampleRate), m_log), + m_guide(Guide::Parameters + (m_parameters.sampleRate, + m_parameters.options & RubberBandStretcher::OptionWindowShort), + m_log), m_guideConfiguration(m_guide.getConfiguration()), m_channelAssembly(m_parameters.channels), m_inhop(1), @@ -62,6 +65,16 @@ R3Stretcher::R3Stretcher(Parameters parameters, m_log.log(1, "R3Stretcher::R3Stretcher: initial time ratio and pitch scale", m_timeRatio, m_pitchScale); + if (isRealTime()) { + m_log.log(1, "R3Stretcher::R3Stretcher: real-time mode"); + } else { + m_log.log(1, "R3Stretcher::R3Stretcher: offline mode"); + } + + if (isShortWindowed()) { + m_log.log(1, "R3Stretcher::R3Stretcher: intermediate shorter-window mode requested"); + } + double maxClassifierFrequency = 16000.0; if (maxClassifierFrequency > m_parameters.sampleRate/2) { maxClassifierFrequency = m_parameters.sampleRate/2; @@ -98,7 +111,8 @@ R3Stretcher::R3Stretcher(Parameters parameters, for (auto band: m_guideConfiguration.fftBandLimits) { int fftSize = band.fftSize; GuidedPhaseAdvance::Parameters guidedParameters - (fftSize, m_parameters.sampleRate, m_parameters.channels); + (fftSize, m_parameters.sampleRate, m_parameters.channels, + isShortWindowed()); m_scaleData[fftSize] = std::make_shared (guidedParameters, m_log); } @@ -130,30 +144,44 @@ R3Stretcher::R3Stretcher(Parameters parameters, } WindowType -R3Stretcher::ScaleData::analysisWindowShape(int fftSize) +R3Stretcher::ScaleData::analysisWindowShape() { - if (fftSize > 2048) return HannWindow; - else return NiemitaloForwardWindow; + if (shortWindowMode) { + if (fftSize >= 2048) return HannWindow; + else return NiemitaloForwardWindow; + } else { + if (fftSize > 2048) return HannWindow; + else return NiemitaloForwardWindow; + } } int -R3Stretcher::ScaleData::analysisWindowLength(int fftSize) +R3Stretcher::ScaleData::analysisWindowLength() { return fftSize; } WindowType -R3Stretcher::ScaleData::synthesisWindowShape(int fftSize) +R3Stretcher::ScaleData::synthesisWindowShape() { - if (fftSize > 2048) return HannWindow; - else return NiemitaloReverseWindow; + if (shortWindowMode) { + if (fftSize >= 2048) return HannWindow; + else return NiemitaloReverseWindow; + } else { + if (fftSize > 2048) return HannWindow; + else return NiemitaloReverseWindow; + } } int -R3Stretcher::ScaleData::synthesisWindowLength(int fftSize) +R3Stretcher::ScaleData::synthesisWindowLength() { - if (fftSize > 2048) return fftSize/2; - else return fftSize; + if (shortWindowMode) { + return fftSize; + } else { + if (fftSize > 2048) return fftSize/2; + else return fftSize; + } } void @@ -292,6 +320,14 @@ R3Stretcher::calculateHop() if (proposedOuthop > 512.0) proposedOuthop = 512.0; if (proposedOuthop < 128.0) proposedOuthop = 128.0; + if (isShortWindowed()) { + // perhaps ironically, the short window mode actually uses a + // longer synthesis window for the 2048-bin FFT and, since + // reduced CPU consumption is the motivation, it can generally + // survive longer hops + proposedOuthop *= 1.5; + } + m_log.log(1, "calculateHop: ratio and proposed outhop", ratio, proposedOuthop); double inhop = proposedOuthop / ratio; @@ -728,6 +764,10 @@ R3Stretcher::consume() for (auto &it : m_channelData[0]->scales) { int fftSize = it.first; + if (isShortWindowed() && + fftSize == m_guideConfiguration.longestFftSize) { + continue; + } for (int c = 0; c < channels; ++c) { auto &cd = m_channelData.at(c); auto &scale = cd->scales.at(fftSize); @@ -898,7 +938,9 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) } // Finally window the longest scale - m_scaleData.at(longest)->analysisWindow.cut(buf); + if (!isShortWindowed()) { + m_scaleData.at(longest)->analysisWindow.cut(buf); + } // FFT shift, forward FFT, and carry out cartesian-polar // conversion for each FFT size. @@ -956,9 +998,12 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) if (fftSize == classify && haveValidReadahead) { continue; } + if (isShortWindowed() && fftSize == longest) { + continue; + } auto &scale = it.second; - + v_fftshift(scale->timeDomain.data(), fftSize); m_scaleData.at(fftSize)->fft.forward(scale->timeDomain.data(), @@ -1127,6 +1172,11 @@ R3Stretcher::adjustFormant(int c) for (auto &it : cd->scales) { int fftSize = it.first; + if (isShortWindowed() && + fftSize == m_guideConfiguration.longestFftSize) { + continue; + } + auto &scale = it.second; int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate)); @@ -1156,6 +1206,8 @@ R3Stretcher::adjustFormant(int c) void R3Stretcher::adjustPreKick(int c) { + if (isShortWindowed()) return; + Profiler profiler("R3Stretcher::adjustPreKick"); auto &cd = m_channelData.at(c); @@ -1197,6 +1249,11 @@ R3Stretcher::synthesiseChannel(int c, int outhop, bool draining) for (const auto &band : cd->guidance.fftBands) { int fftSize = band.fftSize; + + if (isShortWindowed() && fftSize == longest) { + continue; + } + auto &scale = cd->scales.at(fftSize); auto &scaleData = m_scaleData.at(fftSize); diff --git a/src/finer/R3Stretcher.h b/src/finer/R3Stretcher.h index a5c537d..b781eb9 100644 --- a/src/finer/R3Stretcher.h +++ b/src/finer/R3Stretcher.h @@ -253,19 +253,22 @@ protected: struct ScaleData { int fftSize; + bool shortWindowMode; FFT fft; Window analysisWindow; Window synthesisWindow; process_t windowScaleFactor; GuidedPhaseAdvance guided; + ScaleData(GuidedPhaseAdvance::Parameters guidedParameters, Log log) : fftSize(guidedParameters.fftSize), + shortWindowMode(guidedParameters.shortWindowMode), fft(fftSize), - analysisWindow(analysisWindowShape(fftSize), - analysisWindowLength(fftSize)), - synthesisWindow(synthesisWindowShape(fftSize), - synthesisWindowLength(fftSize)), + analysisWindow(analysisWindowShape(), + analysisWindowLength()), + synthesisWindow(synthesisWindowShape(), + synthesisWindowLength()), windowScaleFactor(0.0), guided(guidedParameters, log) { @@ -277,10 +280,10 @@ protected: } } - WindowType analysisWindowShape(int fftSize); - int analysisWindowLength(int fftSize); - WindowType synthesisWindowShape(int fftSize); - int synthesisWindowLength(int fftSize); + WindowType analysisWindowShape(); + int analysisWindowLength(); + WindowType synthesisWindowShape(); + int synthesisWindowLength(); }; Parameters m_parameters; @@ -367,6 +370,11 @@ protected: return m_parameters.options & RubberBandStretcher::OptionProcessRealTime; } + + bool isShortWindowed() const { + return m_parameters.options & + RubberBandStretcher::OptionWindowShort; + } }; } From 279c44477e67cb51a383c8ab903e2d96b0af2f2b Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 14 Jul 2022 12:07:43 +0100 Subject: [PATCH 04/41] Fix unity handling in short-window mode --- src/finer/Guide.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/finer/Guide.h b/src/finer/Guide.h index 9e5f5b2..07c75a0 100644 --- a/src/finer/Guide.h +++ b/src/finer/Guide.h @@ -436,6 +436,14 @@ protected: guidance.fftBands[2].f0 = m_minHigher; guidance.fftBands[2].f1 = nyquist; + if (m_parameters.shortWindowMode) { + guidance.fftBands[0].f1 = 0.0; + guidance.fftBands[1].f0 = 0.0; + guidance.fftBands[1].f1 = nyquist; + guidance.fftBands[2].f0 = nyquist; + guidance.fftBands[2].f1 = nyquist; + } + guidance.phaseReset.present = true; if (!hadPhaseReset) { From e018458736dc5729d3ee0ff13f49c41a7ab0286c Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 14 Jul 2022 13:44:22 +0100 Subject: [PATCH 05/41] Make this fully single-windowed rather than just short-windowed --- main/main.cpp | 6 +++- src/finer/Guide.h | 24 +++++++++------- src/finer/PhaseAdvance.h | 6 ++-- src/finer/R3Stretcher.cpp | 59 ++++++++++++++++++++------------------- src/finer/R3Stretcher.h | 6 ++-- 5 files changed, 55 insertions(+), 46 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index 4f004ae..fcb5920 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -418,7 +418,11 @@ int main(int argc, char **argv) if (!quiet) { if (finer) { - cerr << "Using R3 (finer) engine" << endl; + if (shortwin) { + cerr << "Using intermediate R3 (finer) single-windowed engine" << endl; + } else { + cerr << "Using R3 (finer) engine" << endl; + } } else { cerr << "Using R2 (faster) engine" << endl; cerr << "Using crispness level: " << crispness << " ("; diff --git a/src/finer/Guide.h b/src/finer/Guide.h index 07c75a0..14d41bf 100644 --- a/src/finer/Guide.h +++ b/src/finer/Guide.h @@ -105,10 +105,10 @@ public: struct Parameters { double sampleRate; - bool shortWindowMode; - Parameters(double _sampleRate, bool _shortWindow) : + bool singleWindowMode; + Parameters(double _sampleRate, bool _singleWindow) : sampleRate(_sampleRate), - shortWindowMode(_shortWindow) { } + singleWindowMode(_singleWindow) { } }; Guide(Parameters parameters, Log log) : @@ -123,13 +123,16 @@ public: { double rate = m_parameters.sampleRate; - m_log.log(1, "Guide: rate and short-window mode", - rate, m_parameters.shortWindowMode); + m_log.log(1, "Guide: rate and single-window mode", + rate, m_parameters.singleWindowMode); - if (m_parameters.shortWindowMode) { - m_minLower = 0.0; + if (m_parameters.singleWindowMode) { m_defaultLower = 0.0; - m_maxLower = 0.0; + m_defaultHigher = parameters.sampleRate / 2.0; + m_minLower = m_defaultLower; + m_maxLower = m_defaultLower; + m_minHigher = m_defaultHigher; + m_maxHigher = m_defaultHigher; } int bandFftSize = roundUp(int(ceil(rate/16.0))); @@ -312,9 +315,10 @@ public: guidance.phaseLockBands[3].f0 = higher; guidance.phaseLockBands[3].f1 = nyquist; - if (m_parameters.shortWindowMode) { + if (m_parameters.singleWindowMode) { guidance.phaseLockBands[1].p = 1; guidance.phaseLockBands[2].p = 2; + guidance.phaseLockBands[3].p = 5; } if (outhop > 256) { @@ -436,7 +440,7 @@ protected: guidance.fftBands[2].f0 = m_minHigher; guidance.fftBands[2].f1 = nyquist; - if (m_parameters.shortWindowMode) { + if (m_parameters.singleWindowMode) { guidance.fftBands[0].f1 = 0.0; guidance.fftBands[1].f0 = 0.0; guidance.fftBands[1].f1 = nyquist; diff --git a/src/finer/PhaseAdvance.h b/src/finer/PhaseAdvance.h index 4474c9e..6015a22 100644 --- a/src/finer/PhaseAdvance.h +++ b/src/finer/PhaseAdvance.h @@ -43,11 +43,11 @@ public: int fftSize; double sampleRate; int channels; - bool shortWindowMode; + bool singleWindowMode; Parameters(int _fftSize, double _sampleRate, int _channels, - bool _shortWindow) : + bool _singleWindow) : fftSize(_fftSize), sampleRate(_sampleRate), channels(_channels), - shortWindowMode(_shortWindow) { } + singleWindowMode(_singleWindow) { } }; GuidedPhaseAdvance(Parameters parameters, Log log) : diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index c65fb1a..49bde17 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -71,7 +71,7 @@ R3Stretcher::R3Stretcher(Parameters parameters, m_log.log(1, "R3Stretcher::R3Stretcher: offline mode"); } - if (isShortWindowed()) { + if (isSingleWindowed()) { m_log.log(1, "R3Stretcher::R3Stretcher: intermediate shorter-window mode requested"); } @@ -112,7 +112,7 @@ R3Stretcher::R3Stretcher(Parameters parameters, int fftSize = band.fftSize; GuidedPhaseAdvance::Parameters guidedParameters (fftSize, m_parameters.sampleRate, m_parameters.channels, - isShortWindowed()); + isSingleWindowed()); m_scaleData[fftSize] = std::make_shared (guidedParameters, m_log); } @@ -146,9 +146,8 @@ R3Stretcher::R3Stretcher(Parameters parameters, WindowType R3Stretcher::ScaleData::analysisWindowShape() { - if (shortWindowMode) { - if (fftSize >= 2048) return HannWindow; - else return NiemitaloForwardWindow; + if (singleWindowMode) { + return HannWindow; } else { if (fftSize > 2048) return HannWindow; else return NiemitaloForwardWindow; @@ -164,9 +163,8 @@ R3Stretcher::ScaleData::analysisWindowLength() WindowType R3Stretcher::ScaleData::synthesisWindowShape() { - if (shortWindowMode) { - if (fftSize >= 2048) return HannWindow; - else return NiemitaloReverseWindow; + if (singleWindowMode) { + return HannWindow; } else { if (fftSize > 2048) return HannWindow; else return NiemitaloReverseWindow; @@ -176,7 +174,7 @@ R3Stretcher::ScaleData::synthesisWindowShape() int R3Stretcher::ScaleData::synthesisWindowLength() { - if (shortWindowMode) { + if (singleWindowMode) { return fftSize; } else { if (fftSize > 2048) return fftSize/2; @@ -320,12 +318,12 @@ R3Stretcher::calculateHop() if (proposedOuthop > 512.0) proposedOuthop = 512.0; if (proposedOuthop < 128.0) proposedOuthop = 128.0; - if (isShortWindowed()) { - // perhaps ironically, the short window mode actually uses a - // longer synthesis window for the 2048-bin FFT and, since - // reduced CPU consumption is the motivation, it can generally - // survive longer hops - proposedOuthop *= 1.5; + if (isSingleWindowed()) { + // the single (shorter) window mode actually uses a longer + // synthesis window for the 2048-bin FFT and drops the + // 1024-bin one, so it can survive longer hops, which is good + // because reduced CPU consumption is the whole motivation + proposedOuthop *= 2.0; } m_log.log(1, "calculateHop: ratio and proposed outhop", ratio, proposedOuthop); @@ -764,8 +762,8 @@ R3Stretcher::consume() for (auto &it : m_channelData[0]->scales) { int fftSize = it.first; - if (isShortWindowed() && - fftSize == m_guideConfiguration.longestFftSize) { + if (isSingleWindowed() && + fftSize != m_guideConfiguration.classificationFftSize) { continue; } for (int c = 0; c < channels; ++c) { @@ -905,12 +903,14 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) // the longest. (In practice this means we are probably only // populating one scale) - for (auto &it: cd->scales) { - int fftSize = it.first; - if (fftSize == classify || fftSize == longest) continue; - int offset = (longest - fftSize) / 2; - m_scaleData.at(fftSize)->analysisWindow.cut - (buf + offset, it.second->timeDomain.data()); + if (!isSingleWindowed()) { + for (auto &it: cd->scales) { + int fftSize = it.first; + if (fftSize == classify || fftSize == longest) continue; + int offset = (longest - fftSize) / 2; + m_scaleData.at(fftSize)->analysisWindow.cut + (buf + offset, it.second->timeDomain.data()); + } } // The classification scale has a one-hop readahead, so populate @@ -938,7 +938,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) } // Finally window the longest scale - if (!isShortWindowed()) { + if (!isSingleWindowed()) { m_scaleData.at(longest)->analysisWindow.cut(buf); } @@ -998,7 +998,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) if (fftSize == classify && haveValidReadahead) { continue; } - if (isShortWindowed() && fftSize == longest) { + if (isSingleWindowed() && fftSize != classify) { continue; } @@ -1172,8 +1172,8 @@ R3Stretcher::adjustFormant(int c) for (auto &it : cd->scales) { int fftSize = it.first; - if (isShortWindowed() && - fftSize == m_guideConfiguration.longestFftSize) { + if (isSingleWindowed() && + fftSize != m_guideConfiguration.classificationFftSize) { continue; } @@ -1206,7 +1206,7 @@ R3Stretcher::adjustFormant(int c) void R3Stretcher::adjustPreKick(int c) { - if (isShortWindowed()) return; + if (isSingleWindowed()) return; Profiler profiler("R3Stretcher::adjustPreKick"); @@ -1250,7 +1250,8 @@ R3Stretcher::synthesiseChannel(int c, int outhop, bool draining) for (const auto &band : cd->guidance.fftBands) { int fftSize = band.fftSize; - if (isShortWindowed() && fftSize == longest) { + if (isSingleWindowed() && + fftSize != m_guideConfiguration.classificationFftSize) { continue; } diff --git a/src/finer/R3Stretcher.h b/src/finer/R3Stretcher.h index b781eb9..1ca6367 100644 --- a/src/finer/R3Stretcher.h +++ b/src/finer/R3Stretcher.h @@ -253,7 +253,7 @@ protected: struct ScaleData { int fftSize; - bool shortWindowMode; + bool singleWindowMode; FFT fft; Window analysisWindow; Window synthesisWindow; @@ -263,7 +263,7 @@ protected: ScaleData(GuidedPhaseAdvance::Parameters guidedParameters, Log log) : fftSize(guidedParameters.fftSize), - shortWindowMode(guidedParameters.shortWindowMode), + singleWindowMode(guidedParameters.singleWindowMode), fft(fftSize), analysisWindow(analysisWindowShape(), analysisWindowLength()), @@ -371,7 +371,7 @@ protected: RubberBandStretcher::OptionProcessRealTime; } - bool isShortWindowed() const { + bool isSingleWindowed() const { return m_parameters.options & RubberBandStretcher::OptionWindowShort; } From d0cca91e305be7c6b9868140b38c03b497f538fc Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 14 Jul 2022 13:52:04 +0100 Subject: [PATCH 06/41] Impose a limit here --- src/finer/R3Stretcher.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index 49bde17..5d7e20d 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -324,6 +324,7 @@ R3Stretcher::calculateHop() // 1024-bin one, so it can survive longer hops, which is good // because reduced CPU consumption is the whole motivation proposedOuthop *= 2.0; + if (proposedOuthop > 640.0) proposedOuthop = 640.0; } m_log.log(1, "calculateHop: ratio and proposed outhop", ratio, proposedOuthop); From fbcd385c321370b71a7e19c51b2d79c7d88ff199 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 14 Jul 2022 14:15:10 +0100 Subject: [PATCH 07/41] We're working toward 3.1.0 here --- meson.build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meson.build b/meson.build index 42f7f7f..54951a2 100644 --- a/meson.build +++ b/meson.build @@ -2,7 +2,7 @@ project( 'Rubber Band Library', 'c', 'cpp', - version: '3.0.0', + version: '3.1.0-pre', license: 'GPL-2.0-or-later', default_options: [ 'cpp_std=c++11', @@ -15,7 +15,7 @@ project( meson_version: '>= 0.53.0' ) -rubberband_dynamic_library_version = '2.2.0' +rubberband_dynamic_library_version = '2.2.1' system = host_machine.system() architecture = host_machine.cpu_family() From 511645fed29b8ee7501e38a9a4b391ae8d896150 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 14 Jul 2022 14:16:16 +0100 Subject: [PATCH 08/41] Further version numbers --- rubberband/RubberBandStretcher.h | 2 +- rubberband/rubberband-c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rubberband/RubberBandStretcher.h b/rubberband/RubberBandStretcher.h index 10e4351..85bb6a6 100644 --- a/rubberband/RubberBandStretcher.h +++ b/rubberband/RubberBandStretcher.h @@ -24,7 +24,7 @@ #ifndef RUBBERBAND_STRETCHER_H #define RUBBERBAND_STRETCHER_H -#define RUBBERBAND_VERSION "3.0.0" +#define RUBBERBAND_VERSION "3.1.0" #define RUBBERBAND_API_MAJOR_VERSION 2 #define RUBBERBAND_API_MINOR_VERSION 7 diff --git a/rubberband/rubberband-c.h b/rubberband/rubberband-c.h index 54433a5..fcae64c 100644 --- a/rubberband/rubberband-c.h +++ b/rubberband/rubberband-c.h @@ -28,7 +28,7 @@ extern "C" { #endif -#define RUBBERBAND_VERSION "3.0.0" +#define RUBBERBAND_VERSION "3.1.0" #define RUBBERBAND_API_MAJOR_VERSION 2 #define RUBBERBAND_API_MINOR_VERSION 7 From d21112170180849d6d7acfbd28b5bc22f601fa27 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 14 Jul 2022 17:49:22 +0100 Subject: [PATCH 09/41] Ah, this was still in use in Scavenger. It can stay for now --- src/common/sysutils.cpp | 16 ++++++++++++++++ src/common/sysutils.h | 5 +++++ 2 files changed, 21 insertions(+) diff --git a/src/common/sysutils.cpp b/src/common/sysutils.cpp index 0a13b7e..6f36ae0 100644 --- a/src/common/sysutils.cpp +++ b/src/common/sysutils.cpp @@ -150,6 +150,22 @@ system_is_multiprocessor() return mp; } +#ifdef _WIN32 + +void gettimeofday(struct timeval *tv, void *tz) +{ + union { + long long ns100; + FILETIME ft; + } now; + + ::GetSystemTimeAsFileTime(&now.ft); + tv->tv_usec = (long)((now.ns100 / 10LL) % 1000000LL); + tv->tv_sec = (long)((now.ns100 - 116444736000000000LL) / 10000000LL); +} + +#endif + void system_specific_initialise() { #if defined HAVE_IPP diff --git a/src/common/sysutils.h b/src/common/sysutils.h index c59e1bf..9bcdd9a 100644 --- a/src/common/sysutils.h +++ b/src/common/sysutils.h @@ -99,6 +99,11 @@ extern bool system_is_multiprocessor(); extern void system_specific_initialise(); extern void system_specific_application_initialise(); +#ifdef _WIN32 +struct timeval { long tv_sec; long tv_usec; }; +void gettimeofday(struct timeval *p, void *tz); +#endif // _WIN32 + } // end namespace // The following should be functions in the RubberBand namespace, really From 9fff2836c67470f4a04e5b8865d944743acd86cc Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 2 Aug 2022 16:22:24 +0100 Subject: [PATCH 10/41] Avoid RatioMostlyFixed in RealTime mode, it can lead to unpleasant surprises --- src/finer/R3Stretcher.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index 5d7e20d..2b3fb4c 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -276,14 +276,12 @@ R3Stretcher::createResampler() resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize; if (isRealTime()) { - if (m_parameters.options & - RubberBandStretcher::OptionPitchHighConsistency) { - resamplerParameters.dynamism = Resampler::RatioOftenChanging; - resamplerParameters.ratioChange = Resampler::SmoothRatioChange; - } else { - resamplerParameters.dynamism = Resampler::RatioMostlyFixed; - resamplerParameters.ratioChange = Resampler::SmoothRatioChange; - } + // If we knew the caller would never change ratio, we could + // supply RatioMostlyFixed - but it can have such overhead + // when the ratio *does* change that a single call would kill + // RT use, so it's not a good idea + resamplerParameters.dynamism = Resampler::RatioOftenChanging; + resamplerParameters.ratioChange = Resampler::SmoothRatioChange; } else { resamplerParameters.dynamism = Resampler::RatioMostlyFixed; resamplerParameters.ratioChange = Resampler::SuddenRatioChange; From 2fa0e1162ed5f4fd7a6cfe4709106dda96e548a2 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Wed, 3 Aug 2022 14:16:17 +0100 Subject: [PATCH 11/41] Toward properly expressing the window sizes in Guide when in single-window mode, so as to separate the time-domain frame length from the longest FFT within R3Stretcher and allow us to use a shorter time-domain frame. Not working correctly in single-window mode yet. --- src/finer/Guide.h | 327 +++++++++++++++++++++++--------------- src/finer/R3Stretcher.cpp | 123 +++++++------- src/finer/R3Stretcher.h | 9 +- 3 files changed, 264 insertions(+), 195 deletions(-) diff --git a/src/finer/Guide.h b/src/finer/Guide.h index 14d41bf..78f685a 100644 --- a/src/finer/Guide.h +++ b/src/finer/Guide.h @@ -69,7 +69,9 @@ public: struct Guidance { FftBand fftBands[3]; + int fftBandCount; PhaseLockBand phaseLockBands[4]; + int phaseLockBandCount; Range kick; Range preKick; Range highUnlocked; @@ -96,11 +98,10 @@ public: int shortestFftSize; int classificationFftSize; BandLimits fftBandLimits[3]; - Configuration(int _longestFftSize, int _shortestFftSize, - int _classificationFftSize) : - longestFftSize(_longestFftSize), - shortestFftSize(_shortestFftSize), - classificationFftSize(_classificationFftSize) { } + int fftBandLimitCount; + Configuration() : + longestFftSize(0), shortestFftSize(0), classificationFftSize(0), + fftBandLimitCount(0) { } }; struct Parameters { @@ -113,46 +114,76 @@ public: Guide(Parameters parameters, Log log) : m_parameters(parameters), - m_log(log), - m_configuration(roundUp(int(ceil(parameters.sampleRate / 16.0))), - roundUp(int(ceil(parameters.sampleRate / 64.0))), - roundUp(int(ceil(parameters.sampleRate / 32.0)))), - m_minLower(500.0), m_minHigher(4000.0), - m_defaultLower(700.0), m_defaultHigher(4800.0), - m_maxLower(1100.0), m_maxHigher(7000.0) + m_log(log) { double rate = m_parameters.sampleRate; + double nyquist = rate / 2.0; m_log.log(1, "Guide: rate and single-window mode", rate, m_parameters.singleWindowMode); - if (m_parameters.singleWindowMode) { - m_defaultLower = 0.0; - m_defaultHigher = parameters.sampleRate / 2.0; - m_minLower = m_defaultLower; - m_maxLower = m_defaultLower; - m_minHigher = m_defaultHigher; - m_maxHigher = m_defaultHigher; - } + int classificationFftSize = + roundUp(int(ceil(parameters.sampleRate / 32.0))); - int bandFftSize = roundUp(int(ceil(rate/16.0))); - m_configuration.fftBandLimits[0] = - BandLimits(bandFftSize, rate, 0.0, m_maxLower); + m_configuration.classificationFftSize = classificationFftSize; - // This is the classification and fallback FFT: we need it to - // go up to Nyquist so we can seamlessly switch to it for - // longer stretches, and down to 0.0 so we can use it for - // unity in offline mode - bandFftSize = roundUp(int(ceil(rate/32.0))); - m_configuration.fftBandLimits[1] = - BandLimits(bandFftSize, rate, 0.0, rate / 2.0); - - bandFftSize = roundUp(int(ceil(rate/64.0))); - m_configuration.fftBandLimits[2] = - BandLimits(bandFftSize, rate, m_minHigher, rate/2.0); - m_log.log(1, "Guide: classification FFT size", m_configuration.classificationFftSize); + + if (m_parameters.singleWindowMode) { + + // Single-window mode + + m_configuration.longestFftSize = classificationFftSize; + m_configuration.shortestFftSize = classificationFftSize; + + m_defaultLower = nyquist; + m_minLower = m_defaultLower; + m_maxLower = m_defaultLower; + + m_defaultHigher = nyquist; + m_minHigher = m_defaultHigher; + m_maxHigher = m_defaultHigher; + + m_configuration.fftBandLimitCount = 1; + + m_configuration.fftBandLimits[0] = + BandLimits(classificationFftSize, rate, 0.0, nyquist); + + } else { + + // The normal multi-window mode + + m_configuration.longestFftSize = classificationFftSize * 2; + m_configuration.shortestFftSize = classificationFftSize / 2; + + m_defaultLower = 700.0; + m_minLower = 500.0; + m_maxLower = 1100.0; + + m_defaultHigher = 4800.0; + m_minHigher = 4000.0; + m_maxHigher = 7000.0; + + m_configuration.fftBandLimitCount = 3; + + m_configuration.fftBandLimits[0] = + BandLimits(m_configuration.longestFftSize, + rate, 0.0, m_maxLower); + + // This is the classification and fallback FFT: we need it + // to go up to Nyquist so we can seamlessly switch to it + // for longer stretches, and down to 0.0 so we can use it + // for unity in offline mode + + m_configuration.fftBandLimits[1] = + BandLimits(classificationFftSize, + rate, 0.0, nyquist); + + m_configuration.fftBandLimits[2] = + BandLimits(m_configuration.shortestFftSize, + rate, m_minHigher, nyquist); + } } const Configuration &getConfiguration() const { @@ -184,13 +215,58 @@ public: guidance.channelLock.present = false; double nyquist = m_parameters.sampleRate / 2.0; - guidance.fftBands[0].fftSize = roundUp(int(ceil(nyquist/8.0))); - guidance.fftBands[1].fftSize = roundUp(int(ceil(nyquist/16.0))); - guidance.fftBands[2].fftSize = roundUp(int(ceil(nyquist/32.0))); - // This is a vital stop case for PhaseAdvance - guidance.phaseLockBands[3].f1 = nyquist; + if (m_parameters.singleWindowMode) { + // All the fft and phase-lock bands are fixed in this + // mode. We'll still need to continue to set up phase + // reset ranges etc, including the unity case. + + guidance.fftBandCount = 1; + guidance.fftBands[0].fftSize = m_configuration.classificationFftSize; + guidance.fftBands[0].f0 = 0.0; + guidance.fftBands[0].f1 = nyquist; + + guidance.phaseLockBandCount = 3; + + guidance.phaseLockBands[0].p = 1; + guidance.phaseLockBands[0].beta = betaFor(1200.0, ratio); + guidance.phaseLockBands[0].f0 = 0.0; + guidance.phaseLockBands[0].f1 = 1600.0; + + guidance.phaseLockBands[1].p = 2; + guidance.phaseLockBands[1].beta = betaFor(4800.0, ratio); + guidance.phaseLockBands[1].f0 = 1600.0; + guidance.phaseLockBands[1].f1 = 7000.0; + + guidance.phaseLockBands[2].p = 5; + guidance.phaseLockBands[2].beta = betaFor(10000.0, ratio); + guidance.phaseLockBands[2].f0 = 7000.0; + guidance.phaseLockBands[2].f1 = nyquist; + + if (outhop > 256) { + guidance.phaseLockBands[2].p = 4; + } + + } else { + + // The normal multi-window mode + + guidance.fftBandCount = 3; + guidance.fftBands[0].fftSize = m_configuration.longestFftSize; + guidance.fftBands[1].fftSize = m_configuration.classificationFftSize; + guidance.fftBands[2].fftSize = m_configuration.shortestFftSize; + + guidance.phaseLockBandCount = 4; + + // This is a vital stop case for PhaseAdvance + guidance.phaseLockBands[3].f1 = nyquist; + } + + // We've set the counts, and for single-window mode we've set + // the band ranges as well - in normal multi-window mode we + // still have to do that, but we should do these first + if (meanMagnitude < 1.0e-6) { updateForSilence(guidance); return; @@ -199,8 +275,6 @@ public: if (unityCount > 0) { updateForUnity(guidance, hadPhaseReset, - unityCount, - magnitudes, segmentation, realtime); return; @@ -265,66 +339,64 @@ public: } } - double prevLower = guidance.fftBands[0].f1; - double lower = descendToValley(prevLower, magnitudes); - if (lower > m_maxLower || lower < m_minLower) { - lower = m_defaultLower; + if (!m_parameters.singleWindowMode) { + + // The normal multi-window mode. For single-window we did + // this already. + + double prevLower = guidance.fftBands[0].f1; + double lower = descendToValley(prevLower, magnitudes); + if (lower > m_maxLower || lower < m_minLower) { + lower = m_defaultLower; + } + + double prevHigher = guidance.fftBands[1].f1; + double higher = descendToValley(prevHigher, magnitudes); + if (higher > m_maxHigher || higher < m_minHigher) { + higher = m_defaultHigher; + } + + guidance.fftBands[0].f0 = 0.0; + guidance.fftBands[0].f1 = lower; + + guidance.fftBands[1].f0 = lower; + guidance.fftBands[1].f1 = higher; + + guidance.fftBands[2].f0 = higher; + guidance.fftBands[2].f1 = nyquist; + + if (outhop > 256) { + guidance.fftBands[1].f1 = nyquist; + guidance.fftBands[2].f0 = nyquist; + } + + double mid = std::max(lower, 1600.0); + + guidance.phaseLockBands[0].p = 1; + guidance.phaseLockBands[0].beta = betaFor(300.0, ratio); + guidance.phaseLockBands[0].f0 = 0.0; + guidance.phaseLockBands[0].f1 = lower; + + guidance.phaseLockBands[1].p = 2; + guidance.phaseLockBands[1].beta = betaFor(1600.0, ratio); + guidance.phaseLockBands[1].f0 = lower; + guidance.phaseLockBands[1].f1 = mid; + + guidance.phaseLockBands[2].p = 3; + guidance.phaseLockBands[2].beta = betaFor(4800.0, ratio); + guidance.phaseLockBands[2].f0 = mid; + guidance.phaseLockBands[2].f1 = higher; + + guidance.phaseLockBands[3].p = 4; + guidance.phaseLockBands[3].beta = betaFor(10000.0, ratio); + guidance.phaseLockBands[3].f0 = higher; + guidance.phaseLockBands[3].f1 = nyquist; + + if (outhop > 256) { + guidance.phaseLockBands[3].p = 3; + } } - double prevHigher = guidance.fftBands[1].f1; - double higher = descendToValley(prevHigher, magnitudes); - if (higher > m_maxHigher || higher < m_minHigher) { - higher = m_defaultHigher; - } - - guidance.fftBands[0].f0 = 0.0; - guidance.fftBands[0].f1 = lower; - -// std::cout << "x:" << lower << std::endl; - - guidance.fftBands[1].f0 = lower; - guidance.fftBands[1].f1 = higher; - - guidance.fftBands[2].f0 = higher; - guidance.fftBands[2].f1 = nyquist; - - if (outhop > 256) { - guidance.fftBands[1].f1 = nyquist; - guidance.fftBands[2].f0 = nyquist; - } - - double mid = std::max(lower, 1600.0); - - guidance.phaseLockBands[0].p = 1; - guidance.phaseLockBands[0].beta = betaFor(300.0, ratio); - guidance.phaseLockBands[0].f0 = 0.0; - guidance.phaseLockBands[0].f1 = lower; - - guidance.phaseLockBands[1].p = 2; - guidance.phaseLockBands[1].beta = betaFor(1600.0, ratio); - guidance.phaseLockBands[1].f0 = lower; - guidance.phaseLockBands[1].f1 = mid; - - guidance.phaseLockBands[2].p = 3; - guidance.phaseLockBands[2].beta = betaFor(5000.0, ratio); - guidance.phaseLockBands[2].f0 = mid; - guidance.phaseLockBands[2].f1 = higher; - - guidance.phaseLockBands[3].p = 4; - guidance.phaseLockBands[3].beta = betaFor(10000.0, ratio); - guidance.phaseLockBands[3].f0 = higher; - guidance.phaseLockBands[3].f1 = nyquist; - - if (m_parameters.singleWindowMode) { - guidance.phaseLockBands[1].p = 1; - guidance.phaseLockBands[2].p = 2; - guidance.phaseLockBands[3].p = 5; - } - - if (outhop > 256) { - guidance.phaseLockBands[3].p = 3; - } - if (ratio > 2.0) { // For very long stretches, diffuse is better than @@ -348,7 +420,7 @@ public: guidance.highUnlocked.present = true; } - /* +/* std::ostringstream str; str << "Guidance: FFT bands: [" << guidance.fftBands[0].fftSize << " from " @@ -363,8 +435,9 @@ public: << guidance.phaseReset.present << " from " << guidance.phaseReset.f0 << " to " << guidance.phaseReset.f1 << "]" << std::endl; - m_parameters.logger(str.str()); - */ + + m_log.log(1, str.str().c_str()); +*/ } void setDebugLevel(int level) { @@ -396,12 +469,14 @@ protected: void updateForSilence(Guidance &guidance) const { // std::cout << "phase reset on silence" << std::endl; double nyquist = m_parameters.sampleRate / 2.0; - guidance.fftBands[0].f0 = 0.0; - guidance.fftBands[0].f1 = 0.0; - guidance.fftBands[1].f0 = 0.0; - guidance.fftBands[1].f1 = nyquist; - guidance.fftBands[2].f0 = nyquist; - guidance.fftBands[2].f1 = nyquist; + if (!m_parameters.singleWindowMode) { + guidance.fftBands[0].f0 = 0.0; + guidance.fftBands[0].f1 = 0.0; + guidance.fftBands[1].f0 = 0.0; + guidance.fftBands[1].f1 = nyquist; + guidance.fftBands[2].f0 = nyquist; + guidance.fftBands[2].f1 = nyquist; + } guidance.phaseReset.present = true; guidance.phaseReset.f0 = 0.0; guidance.phaseReset.f1 = nyquist; @@ -409,8 +484,6 @@ protected: void updateForUnity(Guidance &guidance, bool hadPhaseReset, - uint32_t /* unityCount */, - const process_t *const /* magnitudes */, const BinSegmenter::Segmentation &segmentation, bool realtime) const { @@ -421,30 +494,26 @@ protected: if (!realtime) { // ratio can't change, so we are just running 1.0 ratio // throughout - guidance.fftBands[0].f0 = 0.0; - guidance.fftBands[0].f1 = 0.0; - guidance.fftBands[1].f0 = 0.0; - guidance.fftBands[1].f1 = nyquist; - guidance.fftBands[2].f0 = nyquist; - guidance.fftBands[2].f1 = nyquist; + if (!m_parameters.singleWindowMode) { + guidance.fftBands[0].f0 = 0.0; + guidance.fftBands[0].f1 = 0.0; + guidance.fftBands[1].f0 = 0.0; + guidance.fftBands[1].f1 = nyquist; + guidance.fftBands[2].f0 = nyquist; + guidance.fftBands[2].f1 = nyquist; + } guidance.phaseReset.present = true; guidance.phaseReset.f0 = 0.0; guidance.phaseReset.f1 = nyquist; return; } - guidance.fftBands[0].f0 = 0.0; - guidance.fftBands[0].f1 = m_minLower; - guidance.fftBands[1].f0 = m_minLower; - guidance.fftBands[1].f1 = m_minHigher; - guidance.fftBands[2].f0 = m_minHigher; - guidance.fftBands[2].f1 = nyquist; - - if (m_parameters.singleWindowMode) { - guidance.fftBands[0].f1 = 0.0; - guidance.fftBands[1].f0 = 0.0; - guidance.fftBands[1].f1 = nyquist; - guidance.fftBands[2].f0 = nyquist; + if (!m_parameters.singleWindowMode) { + guidance.fftBands[0].f0 = 0.0; + guidance.fftBands[0].f1 = m_minLower; + guidance.fftBands[1].f0 = m_minLower; + guidance.fftBands[1].f1 = m_minHigher; + guidance.fftBands[2].f0 = m_minHigher; guidance.fftBands[2].f1 = nyquist; } diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index 2b3fb4c..bc24b44 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -74,6 +74,14 @@ R3Stretcher::R3Stretcher(Parameters parameters, if (isSingleWindowed()) { m_log.log(1, "R3Stretcher::R3Stretcher: intermediate shorter-window mode requested"); } + + if (m_guideConfiguration.longestFftSize > + m_guideConfiguration.classificationFftSize) { + m_timeDomainFrameLength = m_guideConfiguration.longestFftSize; + } else { + m_timeDomainFrameLength = + (m_guideConfiguration.classificationFftSize * 3) / 2; + } double maxClassifierFrequency = 16000.0; if (maxClassifierFrequency > m_parameters.sampleRate/2) { @@ -90,25 +98,27 @@ R3Stretcher::R3Stretcher(Parameters parameters, BinClassifier::Parameters classifierParameters (classificationBins, 9, 1, 10, 2.0, 2.0); - int inRingBufferSize = m_guideConfiguration.longestFftSize * 2; - int outRingBufferSize = m_guideConfiguration.longestFftSize * 16; + int inRingBufferSize = m_timeDomainFrameLength * 2; + int outRingBufferSize = m_timeDomainFrameLength * 16; for (int c = 0; c < m_parameters.channels; ++c) { m_channelData.push_back(std::make_shared (segmenterParameters, classifierParameters, - m_guideConfiguration.longestFftSize, + m_timeDomainFrameLength, inRingBufferSize, outRingBufferSize)); - for (auto band: m_guideConfiguration.fftBandLimits) { + for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) { + const auto &band = m_guideConfiguration.fftBandLimits[b]; int fftSize = band.fftSize; m_channelData[c]->scales[fftSize] = std::make_shared - (fftSize, m_guideConfiguration.longestFftSize); + (fftSize, m_timeDomainFrameLength); } } - for (auto band: m_guideConfiguration.fftBandLimits) { + for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) { + const auto &band = m_guideConfiguration.fftBandLimits[b]; int fftSize = band.fftSize; GuidedPhaseAdvance::Parameters guidedParameters (fftSize, m_parameters.sampleRate, m_parameters.channels, @@ -273,7 +283,7 @@ R3Stretcher::createResampler() } resamplerParameters.initialSampleRate = m_parameters.sampleRate; - resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize; + resamplerParameters.maxBufferSize = m_timeDomainFrameLength; if (isRealTime()) { // If we knew the caller would never change ratio, we could @@ -447,7 +457,7 @@ R3Stretcher::getPreferredStartPad() const if (!isRealTime()) { return 0; } else { - return m_guideConfiguration.longestFftSize / 2; + return m_timeDomainFrameLength / 2; } } @@ -458,7 +468,7 @@ R3Stretcher::getStartDelay() const return 0; } else { double factor = 0.5 / m_pitchScale; - return size_t(ceil(m_guideConfiguration.longestFftSize * factor)); + return size_t(ceil(m_timeDomainFrameLength * factor)); } } @@ -531,10 +541,9 @@ size_t R3Stretcher::getSamplesRequired() const { if (available() != 0) return 0; - int longest = m_guideConfiguration.longestFftSize; int rs = m_channelData[0]->inbuf->getReadSpace(); - if (rs < longest) { - return longest - rs; + if (rs < m_timeDomainFrameLength) { + return m_timeDomainFrameLength - rs; } else { return 0; } @@ -544,7 +553,7 @@ void R3Stretcher::setMaxProcessSize(size_t n) { size_t oldSize = m_channelData[0]->inbuf->getSize(); - size_t newSize = m_guideConfiguration.longestFftSize + n; + size_t newSize = m_timeDomainFrameLength + n; if (newSize > oldSize) { m_log.log(1, "setMaxProcessSize: resizing from and to", oldSize, newSize); @@ -599,11 +608,11 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final) createResampler(); } - // Pad to half the longest frame. As with R2, in real-time - // mode we don't do this -- it's better to start with a - // swoosh than introduce more latency, and we don't want - // gaps when the ratio changes. - int pad = m_guideConfiguration.longestFftSize / 2; + // Pad to half the frame. As with R2, in real-time mode we + // don't do this -- it's better to start with a swoosh + // than introduce more latency, and we don't want gaps + // when the ratio changes. + int pad = m_timeDomainFrameLength / 2; m_log.log(1, "offline mode: prefilling with", pad); for (int c = 0; c < m_parameters.channels; ++c) { m_channelData[c]->inbuf->zero(pad); @@ -735,7 +744,7 @@ R3Stretcher::consume() // the map iterators int readSpace = cd0->inbuf->getReadSpace(); - if (readSpace < longest) { + if (readSpace < m_timeDomainFrameLength) { if (m_mode == ProcessMode::Finished) { if (readSpace == 0) { int fill = cd0->scales.at(longest)->accumulatorFill; @@ -761,10 +770,6 @@ R3Stretcher::consume() for (auto &it : m_channelData[0]->scales) { int fftSize = it.first; - if (isSingleWindowed() && - fftSize != m_guideConfiguration.classificationFftSize) { - continue; - } for (int c = 0; c < channels; ++c) { auto &cd = m_channelData.at(c); auto &scale = cd->scales.at(fftSize); @@ -887,6 +892,8 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) auto &cd = m_channelData.at(c); process_t *buf = cd->scales.at(longest)->timeDomain.data(); + //!!! review + int readSpace = cd->inbuf->getReadSpace(); if (readSpace < longest) { cd->inbuf->peek(buf, readSpace); @@ -900,16 +907,15 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) // it, windowing as we copy. The classification scale is handled // separately because it has readahead, so skip it here as well as // the longest. (In practice this means we are probably only - // populating one scale) + // populating one scale in multi-window mode, and none at all in + // single-window mode) - if (!isSingleWindowed()) { - for (auto &it: cd->scales) { - int fftSize = it.first; - if (fftSize == classify || fftSize == longest) continue; - int offset = (longest - fftSize) / 2; - m_scaleData.at(fftSize)->analysisWindow.cut - (buf + offset, it.second->timeDomain.data()); - } + for (auto &it: cd->scales) { + int fftSize = it.first; + if (fftSize == classify || fftSize == longest) continue; + int offset = (longest - fftSize) / 2; + m_scaleData.at(fftSize)->analysisWindow.cut + (buf + offset, it.second->timeDomain.data()); } // The classification scale has a one-hop readahead, so populate @@ -937,7 +943,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) } // Finally window the longest scale - if (!isSingleWindowed()) { + if (classify != longest) { m_scaleData.at(longest)->analysisWindow.cut(buf); } @@ -964,14 +970,14 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) classifyScale->real.data(), classifyScale->imag.data()); - for (const auto &b : m_guideConfiguration.fftBandLimits) { - if (b.fftSize == classify) { - + for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) { + const auto &band = m_guideConfiguration.fftBandLimits[b]; + if (band.fftSize == classify) { ToPolarSpec spec; spec.magFromBin = 0; spec.magBinCount = classify/2 + 1; - spec.polarFromBin = b.b0min; - spec.polarBinCount = b.b1max - b.b0min + 1; + spec.polarFromBin = band.b0min; + spec.polarBinCount = band.b1max - band.b0min + 1; convertToPolar(readahead.mag.data(), readahead.phase.data(), classifyScale->real.data(), @@ -997,9 +1003,6 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) if (fftSize == classify && haveValidReadahead) { continue; } - if (isSingleWindowed() && fftSize != classify) { - continue; - } auto &scale = it.second; @@ -1009,8 +1012,9 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) scale->real.data(), scale->imag.data()); - for (const auto &b : m_guideConfiguration.fftBandLimits) { - if (b.fftSize == fftSize) { + for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) { + const auto &band = m_guideConfiguration.fftBandLimits[b]; + if (band.fftSize == fftSize) { ToPolarSpec spec; @@ -1024,11 +1028,11 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) if (fftSize == classify) { spec.magFromBin = 0; spec.magBinCount = classify/2 + 1; - spec.polarFromBin = b.b0min; - spec.polarBinCount = b.b1max - b.b0min + 1; + spec.polarFromBin = band.b0min; + spec.polarBinCount = band.b1max - band.b0min + 1; } else { - spec.magFromBin = b.b0min; - spec.magBinCount = b.b1max - b.b0min + 1; + spec.magFromBin = band.b0min; + spec.magBinCount = band.b1max - band.b0min + 1; spec.polarFromBin = spec.magFromBin; spec.polarBinCount = spec.magBinCount; } @@ -1171,11 +1175,6 @@ R3Stretcher::adjustFormant(int c) for (auto &it : cd->scales) { int fftSize = it.first; - if (isSingleWindowed() && - fftSize != m_guideConfiguration.classificationFftSize) { - continue; - } - auto &scale = it.second; int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate)); @@ -1186,9 +1185,10 @@ R3Stretcher::adjustFormant(int c) process_t maxRatio = 60.0; process_t minRatio = 1.0 / maxRatio; - for (const auto &b : m_guideConfiguration.fftBandLimits) { - if (b.fftSize != fftSize) continue; - for (int i = b.b0min; i < b.b1max && i < highBin; ++i) { + for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) { + const auto &band = m_guideConfiguration.fftBandLimits[b]; + if (band.fftSize != fftSize) continue; + for (int i = band.b0min; i < band.b1max && i < highBin; ++i) { process_t source = cd->formant->envelopeAt(i * sourceFactor); process_t target = cd->formant->envelopeAt(i * targetFactor); if (target > 0.0) { @@ -1205,6 +1205,8 @@ R3Stretcher::adjustFormant(int c) void R3Stretcher::adjustPreKick(int c) { + //!!! if we aren't going to do this, we should modify Guide so as + //!!! not to do the small additional work of checking for it if (isSingleWindowed()) return; Profiler profiler("R3Stretcher::adjustPreKick"); @@ -1245,14 +1247,11 @@ R3Stretcher::synthesiseChannel(int c, int outhop, bool draining) int longest = m_guideConfiguration.longestFftSize; auto &cd = m_channelData.at(c); - - for (const auto &band : cd->guidance.fftBands) { - int fftSize = band.fftSize; - if (isSingleWindowed() && - fftSize != m_guideConfiguration.classificationFftSize) { - continue; - } + for (int b = 0; b < cd->guidance.fftBandCount; ++b) { + + const auto &band = cd->guidance.fftBands[b]; + int fftSize = band.fftSize; auto &scale = cd->scales.at(fftSize); auto &scaleData = m_scaleData.at(fftSize); diff --git a/src/finer/R3Stretcher.h b/src/finer/R3Stretcher.h index 1ca6367..c97887a 100644 --- a/src/finer/R3Stretcher.h +++ b/src/finer/R3Stretcher.h @@ -132,7 +132,7 @@ protected: FixedVector accumulator; int accumulatorFill; - ChannelScaleData(int _fftSize, int _longestFftSize) : + ChannelScaleData(int _fftSize, int _timeDomainLength) : fftSize(_fftSize), bufSize(fftSize/2 + 1), timeDomain(fftSize, 0.f), @@ -143,7 +143,7 @@ protected: advancedPhase(bufSize, 0.f), prevMag(bufSize, 0.f), pendingKick(bufSize, 0.f), - accumulator(_longestFftSize, 0.f), + accumulator(_timeDomainLength, 0.f), accumulatorFill(0) { } @@ -202,7 +202,7 @@ protected: std::unique_ptr formant; ChannelData(BinSegmenter::Parameters segmenterParameters, BinClassifier::Parameters classifierParameters, - int longestFftSize, + int timeDomainLength, int inRingBufferSize, int outRingBufferSize) : scales(), @@ -215,7 +215,7 @@ protected: BinClassifier::Classification::Residual), segmenter(new BinSegmenter(segmenterParameters)), segmentation(), prevSegmentation(), nextSegmentation(), - mixdown(longestFftSize, 0.f), // though it could be shorter + mixdown(timeDomainLength, 0.f), resampled(outRingBufferSize, 0.f), inbuf(new RingBuffer(inRingBufferSize)), outbuf(new RingBuffer(outRingBufferSize)), @@ -297,6 +297,7 @@ protected: std::map> m_scaleData; Guide m_guide; Guide::Configuration m_guideConfiguration; + int m_timeDomainFrameLength; ChannelAssembly m_channelAssembly; std::unique_ptr m_calculator; std::unique_ptr m_resampler; From d1386b0a0c48e64aed4fb2a278c1cfc362e56e7a Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 4 Aug 2022 10:02:09 +0100 Subject: [PATCH 12/41] Provide a separate window source buffer to simplify single/multi window logic --- src/finer/R3Stretcher.cpp | 33 +++++++++++++-------------------- src/finer/R3Stretcher.h | 19 ++++++++++++++----- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index bc24b44..2ee86ca 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -44,6 +44,7 @@ R3Stretcher::R3Stretcher(Parameters parameters, m_parameters.options & RubberBandStretcher::OptionWindowShort), m_log), m_guideConfiguration(m_guide.getConfiguration()), + m_windowSourceBuffer(getWindowSourceBufferLength()), m_channelAssembly(m_parameters.channels), m_inhop(1), m_prevInhop(1), @@ -74,14 +75,6 @@ R3Stretcher::R3Stretcher(Parameters parameters, if (isSingleWindowed()) { m_log.log(1, "R3Stretcher::R3Stretcher: intermediate shorter-window mode requested"); } - - if (m_guideConfiguration.longestFftSize > - m_guideConfiguration.classificationFftSize) { - m_timeDomainFrameLength = m_guideConfiguration.longestFftSize; - } else { - m_timeDomainFrameLength = - (m_guideConfiguration.classificationFftSize * 3) / 2; - } double maxClassifierFrequency = 16000.0; if (maxClassifierFrequency > m_parameters.sampleRate/2) { @@ -98,14 +91,14 @@ R3Stretcher::R3Stretcher(Parameters parameters, BinClassifier::Parameters classifierParameters (classificationBins, 9, 1, 10, 2.0, 2.0); - int inRingBufferSize = m_timeDomainFrameLength * 2; - int outRingBufferSize = m_timeDomainFrameLength * 16; + int inRingBufferSize = getWindowSourceBufferLength() * 2; + int outRingBufferSize = getWindowSourceBufferLength() * 16; for (int c = 0; c < m_parameters.channels; ++c) { m_channelData.push_back(std::make_shared (segmenterParameters, classifierParameters, - m_timeDomainFrameLength, + m_guideConfiguration.longestFftSize, inRingBufferSize, outRingBufferSize)); for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) { @@ -113,7 +106,7 @@ R3Stretcher::R3Stretcher(Parameters parameters, int fftSize = band.fftSize; m_channelData[c]->scales[fftSize] = std::make_shared - (fftSize, m_timeDomainFrameLength); + (fftSize, m_guideConfiguration.longestFftSize); } } @@ -283,7 +276,7 @@ R3Stretcher::createResampler() } resamplerParameters.initialSampleRate = m_parameters.sampleRate; - resamplerParameters.maxBufferSize = m_timeDomainFrameLength; + resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize; if (isRealTime()) { // If we knew the caller would never change ratio, we could @@ -457,7 +450,7 @@ R3Stretcher::getPreferredStartPad() const if (!isRealTime()) { return 0; } else { - return m_timeDomainFrameLength / 2; + return m_windowSourceBuffer.size() / 2; } } @@ -468,7 +461,7 @@ R3Stretcher::getStartDelay() const return 0; } else { double factor = 0.5 / m_pitchScale; - return size_t(ceil(m_timeDomainFrameLength * factor)); + return size_t(ceil(m_windowSourceBuffer.size() * factor)); } } @@ -542,8 +535,8 @@ R3Stretcher::getSamplesRequired() const { if (available() != 0) return 0; int rs = m_channelData[0]->inbuf->getReadSpace(); - if (rs < m_timeDomainFrameLength) { - return m_timeDomainFrameLength - rs; + if (rs < m_windowSourceBuffer.size()) { + return m_windowSourceBuffer.size() - rs; } else { return 0; } @@ -553,7 +546,7 @@ void R3Stretcher::setMaxProcessSize(size_t n) { size_t oldSize = m_channelData[0]->inbuf->getSize(); - size_t newSize = m_timeDomainFrameLength + n; + size_t newSize = m_windowSourceBuffer.size() + n; if (newSize > oldSize) { m_log.log(1, "setMaxProcessSize: resizing from and to", oldSize, newSize); @@ -612,7 +605,7 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final) // don't do this -- it's better to start with a swoosh // than introduce more latency, and we don't want gaps // when the ratio changes. - int pad = m_timeDomainFrameLength / 2; + int pad = m_windowSourceBuffer.size() / 2; m_log.log(1, "offline mode: prefilling with", pad); for (int c = 0; c < m_parameters.channels; ++c) { m_channelData[c]->inbuf->zero(pad); @@ -744,7 +737,7 @@ R3Stretcher::consume() // the map iterators int readSpace = cd0->inbuf->getReadSpace(); - if (readSpace < m_timeDomainFrameLength) { + if (readSpace < m_windowSourceBuffer.size()) { if (m_mode == ProcessMode::Finished) { if (readSpace == 0) { int fill = cd0->scales.at(longest)->accumulatorFill; diff --git a/src/finer/R3Stretcher.h b/src/finer/R3Stretcher.h index c97887a..1da9bea 100644 --- a/src/finer/R3Stretcher.h +++ b/src/finer/R3Stretcher.h @@ -132,7 +132,7 @@ protected: FixedVector accumulator; int accumulatorFill; - ChannelScaleData(int _fftSize, int _timeDomainLength) : + ChannelScaleData(int _fftSize, int _longestFftSize) : fftSize(_fftSize), bufSize(fftSize/2 + 1), timeDomain(fftSize, 0.f), @@ -143,7 +143,7 @@ protected: advancedPhase(bufSize, 0.f), prevMag(bufSize, 0.f), pendingKick(bufSize, 0.f), - accumulator(_timeDomainLength, 0.f), + accumulator(_longestFftSize, 0.f), accumulatorFill(0) { } @@ -202,7 +202,7 @@ protected: std::unique_ptr formant; ChannelData(BinSegmenter::Parameters segmenterParameters, BinClassifier::Parameters classifierParameters, - int timeDomainLength, + int longestFftSize, int inRingBufferSize, int outRingBufferSize) : scales(), @@ -215,7 +215,7 @@ protected: BinClassifier::Classification::Residual), segmenter(new BinSegmenter(segmenterParameters)), segmentation(), prevSegmentation(), nextSegmentation(), - mixdown(timeDomainLength, 0.f), + mixdown(longestFftSize, 0.f), resampled(outRingBufferSize, 0.f), inbuf(new RingBuffer(inRingBufferSize)), outbuf(new RingBuffer(outRingBufferSize)), @@ -297,7 +297,7 @@ protected: std::map> m_scaleData; Guide m_guide; Guide::Configuration m_guideConfiguration; - int m_timeDomainFrameLength; + FixedVector m_windowSourceBuffer; ChannelAssembly m_channelAssembly; std::unique_ptr m_calculator; std::unique_ptr m_resampler; @@ -376,6 +376,15 @@ protected: return m_parameters.options & RubberBandStretcher::OptionWindowShort; } + + int getWindowSourceBufferLength() const { + if (m_guideConfiguration.longestFftSize > + m_guideConfiguration.classificationFftSize) { + return m_guideConfiguration.longestFftSize; + } else { + return (m_guideConfiguration.classificationFftSize * 3) / 2; + } + } }; } From fe9e86bc3c6dad85bb7082359aa6a39d746b4a80 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 4 Aug 2022 10:31:36 +0100 Subject: [PATCH 13/41] Per-channel window source buffer, & connect it up --- src/finer/R3Stretcher.cpp | 72 ++++++++++++++++++--------------------- src/finer/R3Stretcher.h | 6 ++-- 2 files changed, 37 insertions(+), 41 deletions(-) diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index 2ee86ca..561e527 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -44,7 +44,6 @@ R3Stretcher::R3Stretcher(Parameters parameters, m_parameters.options & RubberBandStretcher::OptionWindowShort), m_log), m_guideConfiguration(m_guide.getConfiguration()), - m_windowSourceBuffer(getWindowSourceBufferLength()), m_channelAssembly(m_parameters.channels), m_inhop(1), m_prevInhop(1), @@ -91,14 +90,15 @@ R3Stretcher::R3Stretcher(Parameters parameters, BinClassifier::Parameters classifierParameters (classificationBins, 9, 1, 10, 2.0, 2.0); - int inRingBufferSize = getWindowSourceBufferLength() * 2; - int outRingBufferSize = getWindowSourceBufferLength() * 16; + int inRingBufferSize = getWindowSourceSize() * 2; + int outRingBufferSize = getWindowSourceSize() * 16; for (int c = 0; c < m_parameters.channels; ++c) { m_channelData.push_back(std::make_shared (segmenterParameters, classifierParameters, m_guideConfiguration.longestFftSize, + getWindowSourceSize(), inRingBufferSize, outRingBufferSize)); for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) { @@ -450,7 +450,7 @@ R3Stretcher::getPreferredStartPad() const if (!isRealTime()) { return 0; } else { - return m_windowSourceBuffer.size() / 2; + return getWindowSourceSize() / 2; } } @@ -461,7 +461,7 @@ R3Stretcher::getStartDelay() const return 0; } else { double factor = 0.5 / m_pitchScale; - return size_t(ceil(m_windowSourceBuffer.size() * factor)); + return size_t(ceil(getWindowSourceSize() * factor)); } } @@ -535,8 +535,8 @@ R3Stretcher::getSamplesRequired() const { if (available() != 0) return 0; int rs = m_channelData[0]->inbuf->getReadSpace(); - if (rs < m_windowSourceBuffer.size()) { - return m_windowSourceBuffer.size() - rs; + if (rs < getWindowSourceSize()) { + return getWindowSourceSize() - rs; } else { return 0; } @@ -546,7 +546,7 @@ void R3Stretcher::setMaxProcessSize(size_t n) { size_t oldSize = m_channelData[0]->inbuf->getSize(); - size_t newSize = m_windowSourceBuffer.size() + n; + size_t newSize = getWindowSourceSize() + n; if (newSize > oldSize) { m_log.log(1, "setMaxProcessSize: resizing from and to", oldSize, newSize); @@ -605,7 +605,7 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final) // don't do this -- it's better to start with a swoosh // than introduce more latency, and we don't want gaps // when the ratio changes. - int pad = m_windowSourceBuffer.size() / 2; + int pad = getWindowSourceSize() / 2; m_log.log(1, "offline mode: prefilling with", pad); for (int c = 0; c < m_parameters.channels; ++c) { m_channelData[c]->inbuf->zero(pad); @@ -737,7 +737,7 @@ R3Stretcher::consume() // the map iterators int readSpace = cd0->inbuf->getReadSpace(); - if (readSpace < m_windowSourceBuffer.size()) { + if (readSpace < getWindowSourceSize()) { if (m_mode == ProcessMode::Finished) { if (readSpace == 0) { int fill = cd0->scales.at(longest)->accumulatorFill; @@ -879,34 +879,33 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) { Profiler profiler("R3Stretcher::analyseChannel"); - int longest = m_guideConfiguration.longestFftSize; - int classify = m_guideConfiguration.classificationFftSize; - auto &cd = m_channelData.at(c); - process_t *buf = cd->scales.at(longest)->timeDomain.data(); - //!!! review - + int bufSize = cd->windowSource.size(); + process_t *buf = cd->windowSource.data(); + int readSpace = cd->inbuf->getReadSpace(); - if (readSpace < longest) { + if (readSpace < bufSize) { cd->inbuf->peek(buf, readSpace); - v_zero(buf + readSpace, longest - readSpace); + v_zero(buf + readSpace, bufSize - readSpace); } else { - cd->inbuf->peek(buf, longest); + cd->inbuf->peek(buf, bufSize); } - // We have a single unwindowed frame at the longest FFT size - // ("scale"). Populate the shorter FFT sizes from the centre of - // it, windowing as we copy. The classification scale is handled - // separately because it has readahead, so skip it here as well as - // the longest. (In practice this means we are probably only - // populating one scale in multi-window mode, and none at all in - // single-window mode) + // We have an unwindowed time-domain frame in buf that is as long + // as required for the union of all FFT sizes and readahead + // hops. Populate the various sizes from it with aligned centres, + // windowing as we copy. The classification scale is handled + // separately because it has readahead, so skip it here. (In + // single-window mode that means we do nothing here, since the + // classification scale is the only one.) + + int classify = m_guideConfiguration.classificationFftSize; for (auto &it: cd->scales) { int fftSize = it.first; - if (fftSize == classify || fftSize == longest) continue; - int offset = (longest - fftSize) / 2; + if (fftSize == classify) continue; + int offset = (bufSize - fftSize) / 2; m_scaleData.at(fftSize)->analysisWindow.cut (buf + offset, it.second->timeDomain.data()); } @@ -918,27 +917,22 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) ClassificationReadaheadData &readahead = cd->readahead; m_scaleData.at(classify)->analysisWindow.cut - (buf + (longest - classify) / 2 + inhop, + (buf + (bufSize - classify) / 2 + inhop, readahead.timeDomain.data()); - // If inhop has changed since the previous frame, we'll have to - // populate the classification scale (but for analysis/resynthesis - // rather than classification) anew rather than reuse the previous - // readahead. Pity... + // If inhop has changed since the previous frame, we must populate + // the classification scale (but for analysis/resynthesis rather + // than classification) anew rather than reuse the previous + // frame's readahead. bool haveValidReadahead = cd->haveReadahead; if (inhop != prevInhop) haveValidReadahead = false; if (!haveValidReadahead) { m_scaleData.at(classify)->analysisWindow.cut - (buf + (longest - classify) / 2, + (buf + (bufSize - classify) / 2, classifyScale->timeDomain.data()); } - - // Finally window the longest scale - if (classify != longest) { - m_scaleData.at(longest)->analysisWindow.cut(buf); - } // FFT shift, forward FFT, and carry out cartesian-polar // conversion for each FFT size. diff --git a/src/finer/R3Stretcher.h b/src/finer/R3Stretcher.h index 1da9bea..3abae36 100644 --- a/src/finer/R3Stretcher.h +++ b/src/finer/R3Stretcher.h @@ -185,6 +185,7 @@ protected: struct ChannelData { std::map> scales; + FixedVector windowSource; ClassificationReadaheadData readahead; bool haveReadahead; std::unique_ptr classifier; @@ -203,9 +204,11 @@ protected: ChannelData(BinSegmenter::Parameters segmenterParameters, BinClassifier::Parameters classifierParameters, int longestFftSize, + int windowSourceSize, int inRingBufferSize, int outRingBufferSize) : scales(), + windowSource(windowSourceSize, 0.0), readahead(segmenterParameters.fftSize), haveReadahead(false), classifier(new BinClassifier(classifierParameters)), @@ -297,7 +300,6 @@ protected: std::map> m_scaleData; Guide m_guide; Guide::Configuration m_guideConfiguration; - FixedVector m_windowSourceBuffer; ChannelAssembly m_channelAssembly; std::unique_ptr m_calculator; std::unique_ptr m_resampler; @@ -377,7 +379,7 @@ protected: RubberBandStretcher::OptionWindowShort; } - int getWindowSourceBufferLength() const { + int getWindowSourceSize() const { if (m_guideConfiguration.longestFftSize > m_guideConfiguration.classificationFftSize) { return m_guideConfiguration.longestFftSize; From 5b26e90a580d0e583bc37675b510cbf84a3ddcc6 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 4 Aug 2022 11:19:13 +0100 Subject: [PATCH 14/41] Fix to windowing offsets --- src/finer/R3Stretcher.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index 561e527..160b457 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -881,15 +881,15 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) auto &cd = m_channelData.at(c); - int bufSize = cd->windowSource.size(); + int sourceSize = cd->windowSource.size(); process_t *buf = cd->windowSource.data(); int readSpace = cd->inbuf->getReadSpace(); - if (readSpace < bufSize) { + if (readSpace < sourceSize) { cd->inbuf->peek(buf, readSpace); - v_zero(buf + readSpace, bufSize - readSpace); + v_zero(buf + readSpace, sourceSize - readSpace); } else { - cd->inbuf->peek(buf, bufSize); + cd->inbuf->peek(buf, sourceSize); } // We have an unwindowed time-domain frame in buf that is as long @@ -900,12 +900,13 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) // single-window mode that means we do nothing here, since the // classification scale is the only one.) + int longest = m_guideConfiguration.longestFftSize; int classify = m_guideConfiguration.classificationFftSize; for (auto &it: cd->scales) { int fftSize = it.first; if (fftSize == classify) continue; - int offset = (bufSize - fftSize) / 2; + int offset = (longest - fftSize) / 2; m_scaleData.at(fftSize)->analysisWindow.cut (buf + offset, it.second->timeDomain.data()); } @@ -915,9 +916,9 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) auto &classifyScale = cd->scales.at(classify); ClassificationReadaheadData &readahead = cd->readahead; - + m_scaleData.at(classify)->analysisWindow.cut - (buf + (bufSize - classify) / 2 + inhop, + (buf + (longest - classify) / 2 + inhop, readahead.timeDomain.data()); // If inhop has changed since the previous frame, we must populate @@ -930,7 +931,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) if (!haveValidReadahead) { m_scaleData.at(classify)->analysisWindow.cut - (buf + (bufSize - classify) / 2, + (buf + (longest - classify) / 2, classifyScale->timeDomain.data()); } From 7833e1bff107ac1a4fbecc6fa66edd495261e0c5 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 4 Aug 2022 12:08:15 +0100 Subject: [PATCH 15/41] Avoid kick analysis in single-window mode; comment update --- src/finer/Guide.h | 39 ++++++++++++++++++++++----------------- src/finer/R3Stretcher.cpp | 6 ++---- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/src/finer/Guide.h b/src/finer/Guide.h index 78f685a..83667b4 100644 --- a/src/finer/Guide.h +++ b/src/finer/Guide.h @@ -289,15 +289,29 @@ public: guidance.channelLock.f1 = 600.0; } - bool kick = - (segmentation.percussiveBelow > 40.0) && - (prevSegmentation.percussiveBelow < 40.0) && - checkPotentialKick(magnitudes, prevMagnitudes); + if (!m_parameters.singleWindowMode) { + + bool kick = + (segmentation.percussiveBelow > 40.0) && + (prevSegmentation.percussiveBelow < 40.0) && + checkPotentialKick(magnitudes, prevMagnitudes); - bool futureKick = !kick && - (nextSegmentation.percussiveBelow > 40.0) && - (segmentation.percussiveBelow < 40.0) && - checkPotentialKick(nextMagnitudes, magnitudes); + bool futureKick = !kick && + (nextSegmentation.percussiveBelow > 40.0) && + (segmentation.percussiveBelow < 40.0) && + checkPotentialKick(nextMagnitudes, magnitudes); + + if (kick) { + guidance.kick.present = true; + guidance.kick.f0 = 0.0; + guidance.kick.f1 = segmentation.percussiveBelow; + } else if (futureKick) { + guidance.preKick.present = true; + guidance.preKick.f0 = 0.0; + guidance.preKick.f1 = nextSegmentation.percussiveBelow; + } + } + /* std::cout << "d:" << prevSegmentation.percussiveBelow << "," @@ -308,15 +322,6 @@ public: << (kick ? "K" : "N") << "," << (futureKick ? "F" : "N") << std::endl; */ - if (kick) { - guidance.kick.present = true; - guidance.kick.f0 = 0.0; - guidance.kick.f1 = segmentation.percussiveBelow; - } else if (futureKick) { - guidance.preKick.present = true; - guidance.preKick.f0 = 0.0; - guidance.preKick.f1 = nextSegmentation.percussiveBelow; - } if (segmentation.residualAbove > segmentation.percussiveAbove) { guidance.highUnlocked.present = true; diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index 160b457..0aa74ac 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -281,8 +281,8 @@ R3Stretcher::createResampler() if (isRealTime()) { // If we knew the caller would never change ratio, we could // supply RatioMostlyFixed - but it can have such overhead - // when the ratio *does* change that a single call would kill - // RT use, so it's not a good idea + // when the ratio *does* change (and it's not RT-safe overhead + // either) that a single call would kill RT use resamplerParameters.dynamism = Resampler::RatioOftenChanging; resamplerParameters.ratioChange = Resampler::SmoothRatioChange; } else { @@ -1193,8 +1193,6 @@ R3Stretcher::adjustFormant(int c) void R3Stretcher::adjustPreKick(int c) { - //!!! if we aren't going to do this, we should modify Guide so as - //!!! not to do the small additional work of checking for it if (isSingleWindowed()) return; Profiler profiler("R3Stretcher::adjustPreKick"); From 76ee5998f16b5c4b5ea481065f2b7d376a85661b Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 4 Aug 2022 13:31:33 +0100 Subject: [PATCH 16/41] Permit rational precision to vary by quality setting in RatioOftenChanging mode --- src/common/BQResampler.cpp | 5 ++++- src/common/BQResampler.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/common/BQResampler.cpp b/src/common/BQResampler.cpp index 2ac5f50..30d3d38 100644 --- a/src/common/BQResampler.cpp +++ b/src/common/BQResampler.cpp @@ -122,6 +122,7 @@ BQResampler::QualityParams::QualityParams(Quality q) k_snr = 70.0; k_transition = 0.2; cut = 0.9; + rational_max = 48000; break; case FastestTolerable: p_multiple = 62; @@ -129,6 +130,7 @@ BQResampler::QualityParams::QualityParams(Quality q) k_snr = 90.0; k_transition = 0.05; cut = 0.975; + rational_max = 96000; break; case Best: p_multiple = 122; @@ -136,6 +138,7 @@ BQResampler::QualityParams::QualityParams(Quality q) k_snr = 100.0; k_transition = 0.01; cut = 0.995; + rational_max = 192000; break; } } @@ -375,7 +378,7 @@ BQResampler::pick_params(double ratio) const { // Farey algorithm, see // https://www.johndcook.com/blog/2010/10/20/best-rational-approximation/ - int max_denom = 192000; + int max_denom = m_qparams.rational_max; double a = 0.0, b = 1.0, c = 1.0, d = 0.0; double pa = a, pb = b, pc = c, pd = d; double eps = 1e-9; diff --git a/src/common/BQResampler.h b/src/common/BQResampler.h index 4858096..7319c5d 100644 --- a/src/common/BQResampler.h +++ b/src/common/BQResampler.h @@ -71,6 +71,7 @@ private: double k_snr; double k_transition; double cut; + int rational_max; QualityParams(Quality); }; From 63ebc451076c8fdbae3c7a10b59df193f543a598 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 4 Aug 2022 13:34:31 +0100 Subject: [PATCH 17/41] ... I said, in RatioOftenChanging mode --- src/common/BQResampler.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/common/BQResampler.cpp b/src/common/BQResampler.cpp index 30d3d38..384e159 100644 --- a/src/common/BQResampler.cpp +++ b/src/common/BQResampler.cpp @@ -378,7 +378,12 @@ BQResampler::pick_params(double ratio) const { // Farey algorithm, see // https://www.johndcook.com/blog/2010/10/20/best-rational-approximation/ - int max_denom = m_qparams.rational_max; + int max_denom; + if (m_dynamism == RatioMostlyFixed) { + max_denom = 192000; + } else { + max_denom = m_qparams.rational_max; + } double a = 0.0, b = 1.0, c = 1.0, d = 0.0; double pa = a, pb = b, pc = c, pd = d; double eps = 1e-9; From 3eae190a85030688b7e8639b47ea8d89735560e3 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 4 Aug 2022 16:31:23 +0100 Subject: [PATCH 18/41] This is not actually usually a serious problem, unlike the <1 case which can be genuinely troublesome --- src/finer/R3Stretcher.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index 0aa74ac..1513158 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -336,7 +336,7 @@ R3Stretcher::calculateHop() inhop = 1.0; } if (inhop > 1024.0) { - m_log.log(0, "WARNING: Extreme ratio yields ideal inhop > 1024, results may be suspect", ratio, inhop); + m_log.log(1, "WARNING: Ratio yields ideal inhop > 1024, results may be suspect", ratio, inhop); inhop = 1024.0; } From 4aea160ec3c0f09bdd375483c9952a37a2b404c7 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 4 Aug 2022 16:58:00 +0100 Subject: [PATCH 19/41] Indentation only --- src/faster/R2Stretcher.cpp | 3 +-- src/faster/StretcherChannelData.cpp | 2 +- src/faster/StretcherProcess.cpp | 42 +++++++++++++++-------------- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/faster/R2Stretcher.cpp b/src/faster/R2Stretcher.cpp index 4033c11..b23c515 100644 --- a/src/faster/R2Stretcher.cpp +++ b/src/faster/R2Stretcher.cpp @@ -318,8 +318,7 @@ R2Stretcher::setMaxProcessSize(size_t samples) } void -R2Stretcher::setKeyFrameMap(const std::map & - mapping) +R2Stretcher::setKeyFrameMap(const std::map &mapping) { if (m_realtime) { m_log.log(0, "R2Stretcher::setKeyFrameMap: Cannot specify key frame map in RT mode"); diff --git a/src/faster/StretcherChannelData.cpp b/src/faster/StretcherChannelData.cpp index 81c47d2..376b76c 100644 --- a/src/faster/StretcherChannelData.cpp +++ b/src/faster/StretcherChannelData.cpp @@ -111,7 +111,7 @@ R2Stretcher::ChannelData::construct(const std::set &sizes, void R2Stretcher::ChannelData::setSizes(size_t windowSize, - size_t fftSize) + size_t fftSize) { size_t maxSize = 2 * std::max(windowSize, fftSize); size_t realSize = maxSize / 2 + 1; diff --git a/src/faster/StretcherProcess.cpp b/src/faster/StretcherProcess.cpp index 7044c7a..5074a93 100644 --- a/src/faster/StretcherProcess.cpp +++ b/src/faster/StretcherProcess.cpp @@ -128,10 +128,10 @@ R2Stretcher::resampleBeforeStretching() const void R2Stretcher::prepareChannelMS(size_t c, - const float *const *inputs, - size_t offset, - size_t samples, - float *prepared) + const float *const *inputs, + size_t offset, + size_t samples, + float *prepared) { for (size_t i = 0; i < samples; ++i) { float left = inputs[0][i + offset]; @@ -148,10 +148,10 @@ R2Stretcher::prepareChannelMS(size_t c, size_t R2Stretcher::consumeChannel(size_t c, - const float *const *inputs, - size_t offset, - size_t samples, - bool final) + const float *const *inputs, + size_t offset, + size_t samples, + bool final) { Profiler profiler("R2Stretcher::consumeChannel"); @@ -405,9 +405,9 @@ R2Stretcher::testInbufReadSpace(size_t c) bool R2Stretcher::processChunkForChannel(size_t c, - size_t phaseIncrement, - size_t shiftIncrement, - bool phaseReset) + size_t phaseIncrement, + size_t shiftIncrement, + bool phaseReset) { Profiler profiler("R2Stretcher::processChunkForChannel"); @@ -499,8 +499,8 @@ R2Stretcher::processChunkForChannel(size_t c, void R2Stretcher::calculateIncrements(size_t &phaseIncrementRtn, - size_t &shiftIncrementRtn, - bool &phaseReset) + size_t &shiftIncrementRtn, + bool &phaseReset) { Profiler profiler("R2Stretcher::calculateIncrements"); @@ -629,9 +629,9 @@ R2Stretcher::calculateIncrements(size_t &phaseIncrementRtn, bool R2Stretcher::getIncrements(size_t channel, - size_t &phaseIncrementRtn, - size_t &shiftIncrementRtn, - bool &phaseReset) + size_t &phaseIncrementRtn, + size_t &shiftIncrementRtn, + bool &phaseReset) { Profiler profiler("R2Stretcher::getIncrements"); @@ -723,8 +723,8 @@ R2Stretcher::analyseChunk(size_t channel) void R2Stretcher::modifyChunk(size_t channel, - size_t outputIncrement, - bool phaseReset) + size_t outputIncrement, + bool phaseReset) { Profiler profiler("R2Stretcher::modifyChunk"); @@ -923,7 +923,7 @@ R2Stretcher::formantShiftChunk(size_t channel) void R2Stretcher::synthesiseChunk(size_t channel, - size_t shiftIncrement) + size_t shiftIncrement) { Profiler profiler("R2Stretcher::synthesiseChunk"); @@ -1085,7 +1085,9 @@ R2Stretcher::writeChunk(size_t channel, size_t shiftIncrement, bool last) } void -R2Stretcher::writeOutput(RingBuffer &to, float *from, size_t qty, size_t &outCount, size_t theoreticalOut) +R2Stretcher::writeOutput(RingBuffer &to, + float *from, size_t qty, + size_t &outCount, size_t theoreticalOut) { Profiler profiler("R2Stretcher::writeOutput"); From 6ecc973d41019d87499677b39cc35ef2843d31d3 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 5 Aug 2022 14:58:12 +0100 Subject: [PATCH 20/41] Keep a structure with recommended limits for inhop/outhop in current configuration; skip readahead entirely when inhop is greater than a certain limit - allowing us to reduce start delay further in short-window mode --- src/finer/R3Stretcher.cpp | 203 +++++++++++++++++++++++--------------- src/finer/R3Stretcher.h | 30 +++++- 2 files changed, 152 insertions(+), 81 deletions(-) diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index 1513158..ca48abe 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -35,6 +35,7 @@ R3Stretcher::R3Stretcher(Parameters parameters, double initialPitchScale, Log log) : m_parameters(parameters), + m_limits(parameters.options), m_log(log), m_timeRatio(initialTimeRatio), m_pitchScale(initialPitchScale), @@ -45,6 +46,7 @@ R3Stretcher::R3Stretcher(Parameters parameters, m_log), m_guideConfiguration(m_guide.getConfiguration()), m_channelAssembly(m_parameters.channels), + m_useReadahead(true), m_inhop(1), m_prevInhop(1), m_prevOuthop(1), @@ -315,9 +317,7 @@ R3Stretcher::calculateHop() proposedOuthop = pow(2.0, 8.0 + 2.0 * log10(ratio - 0.5)); } else if (ratio < 1.0) { proposedOuthop = pow(2.0, 8.0 + 2.0 * log10(ratio)); - } - if (proposedOuthop > 512.0) proposedOuthop = 512.0; - if (proposedOuthop < 128.0) proposedOuthop = 128.0; + } if (isSingleWindowed()) { // the single (shorter) window mode actually uses a longer @@ -325,24 +325,38 @@ R3Stretcher::calculateHop() // 1024-bin one, so it can survive longer hops, which is good // because reduced CPU consumption is the whole motivation proposedOuthop *= 2.0; - if (proposedOuthop > 640.0) proposedOuthop = 640.0; + } + + if (proposedOuthop > m_limits.maxPreferredOuthop) { + proposedOuthop = m_limits.maxPreferredOuthop; + } + if (proposedOuthop < m_limits.minPreferredOuthop) { + proposedOuthop = m_limits.minPreferredOuthop; } m_log.log(1, "calculateHop: ratio and proposed outhop", ratio, proposedOuthop); double inhop = proposedOuthop / ratio; - if (inhop < 1.0) { - m_log.log(0, "WARNING: Extreme ratio yields ideal inhop < 1, results may be suspect", ratio, inhop); - inhop = 1.0; + if (inhop < m_limits.minInhop) { + m_log.log(0, "WARNING: Ratio yields ideal inhop < minimum, results may be suspect", inhop, m_limits.minInhop); + inhop = m_limits.minInhop; } - if (inhop > 1024.0) { - m_log.log(1, "WARNING: Ratio yields ideal inhop > 1024, results may be suspect", ratio, inhop); - inhop = 1024.0; + if (inhop > m_limits.maxInhop) { + // Log level 1, this is not as big a deal as < minInhop above + m_log.log(1, "WARNING: Ratio yields ideal inhop > maximum, results may be suspect", inhop, m_limits.maxInhop); + inhop = m_limits.maxInhop; } m_inhop = int(floor(inhop)); - m_log.log(1, "calculateHop: inhop and mean outhop", m_inhop, m_inhop * ratio); + + if (m_inhop < m_limits.maxInhopWithReadahead) { + m_log.log(1, "calculateHop: using readahead"); + m_useReadahead = true; + } else { + m_log.log(1, "calculateHop: not using readahead, inhop too long for buffer in current configuration"); + m_useReadahead = false; + } } void @@ -911,25 +925,30 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) (buf + offset, it.second->timeDomain.data()); } - // The classification scale has a one-hop readahead, so populate - // the readahead from further down the long unwindowed frame. - auto &classifyScale = cd->scales.at(classify); ClassificationReadaheadData &readahead = cd->readahead; + bool copyFromReadahead = false; - m_scaleData.at(classify)->analysisWindow.cut - (buf + (longest - classify) / 2 + inhop, - readahead.timeDomain.data()); + if (m_useReadahead) { + + // The classification scale has a one-hop readahead, so + // populate the readahead from further down the long + // unwindowed frame. - // If inhop has changed since the previous frame, we must populate - // the classification scale (but for analysis/resynthesis rather - // than classification) anew rather than reuse the previous - // frame's readahead. + m_scaleData.at(classify)->analysisWindow.cut + (buf + (longest - classify) / 2 + inhop, + readahead.timeDomain.data()); - bool haveValidReadahead = cd->haveReadahead; - if (inhop != prevInhop) haveValidReadahead = false; + // If inhop has changed since the previous frame, we must + // populate the classification scale (but for + // analysis/resynthesis rather than classification) anew + // rather than reuse the previous frame's readahead. - if (!haveValidReadahead) { + copyFromReadahead = cd->haveReadahead; + if (inhop != prevInhop) copyFromReadahead = false; + } + + if (!copyFromReadahead) { m_scaleData.at(classify)->analysisWindow.cut (buf + (longest - classify) / 2, classifyScale->timeDomain.data()); @@ -944,51 +963,54 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) // where the inhop has changed as above, in which case we need to // do both readahead and current) - if (haveValidReadahead) { - v_copy(classifyScale->mag.data(), - readahead.mag.data(), - classifyScale->bufSize); - v_copy(classifyScale->phase.data(), - readahead.phase.data(), - classifyScale->bufSize); - } + if (m_useReadahead) { - v_fftshift(readahead.timeDomain.data(), classify); - m_scaleData.at(classify)->fft.forward(readahead.timeDomain.data(), - classifyScale->real.data(), - classifyScale->imag.data()); - - for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) { - const auto &band = m_guideConfiguration.fftBandLimits[b]; - if (band.fftSize == classify) { - ToPolarSpec spec; - spec.magFromBin = 0; - spec.magBinCount = classify/2 + 1; - spec.polarFromBin = band.b0min; - spec.polarBinCount = band.b1max - band.b0min + 1; - convertToPolar(readahead.mag.data(), - readahead.phase.data(), - classifyScale->real.data(), - classifyScale->imag.data(), - spec); - - v_scale(classifyScale->mag.data(), - 1.0 / double(classify), - classifyScale->mag.size()); - break; + if (copyFromReadahead) { + v_copy(classifyScale->mag.data(), + readahead.mag.data(), + classifyScale->bufSize); + v_copy(classifyScale->phase.data(), + readahead.phase.data(), + classifyScale->bufSize); } - } - cd->haveReadahead = true; + v_fftshift(readahead.timeDomain.data(), classify); + m_scaleData.at(classify)->fft.forward(readahead.timeDomain.data(), + classifyScale->real.data(), + classifyScale->imag.data()); + + for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) { + const auto &band = m_guideConfiguration.fftBandLimits[b]; + if (band.fftSize == classify) { + ToPolarSpec spec; + spec.magFromBin = 0; + spec.magBinCount = classify/2 + 1; + spec.polarFromBin = band.b0min; + spec.polarBinCount = band.b1max - band.b0min + 1; + convertToPolar(readahead.mag.data(), + readahead.phase.data(), + classifyScale->real.data(), + classifyScale->imag.data(), + spec); + + v_scale(classifyScale->mag.data(), + 1.0 / double(classify), + classifyScale->mag.size()); + break; + } + } + + cd->haveReadahead = true; + } // For the others (and the classify as well, if the inhop has - // changed or we haven't filled the readahead yet) we operate - // directly in the scale data and restrict the range for - // cartesian-polar conversion + // changed or we aren't using readahead or haven't filled the + // readahead yet) we operate directly in the scale data and + // restrict the range for cartesian-polar conversion for (auto &it: cd->scales) { int fftSize = it.first; - if (fftSize == classify && haveValidReadahead) { + if (fftSize == classify && copyFromReadahead) { continue; } @@ -1010,8 +1032,8 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) // range, as all the magnitudes (though not // necessarily all phases) are potentially relevant to // classification and formant analysis. But this case - // here only happens if we don't haveValidReadahead - - // the normal case is above and just copies from the + // here only happens if we don't copyFromReadahead - + // the normal case is above and, er, copies from the // previous readahead. if (fftSize == classify) { spec.magFromBin = 0; @@ -1050,8 +1072,14 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) v_copy(cd->classification.data(), cd->nextClassification.data(), cd->classification.size()); - cd->classifier->classify(readahead.mag.data(), - cd->nextClassification.data()); + + if (m_useReadahead) { + cd->classifier->classify(readahead.mag.data(), + cd->nextClassification.data()); + } else { + cd->classifier->classify(classifyScale->mag.data(), + cd->nextClassification.data()); + } cd->prevSegmentation = cd->segmentation; cd->segmentation = cd->nextSegmentation; @@ -1090,20 +1118,39 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop) bool tighterChannelLock = m_parameters.options & RubberBandStretcher::OptionChannelsTogether; + + double magMean = v_mean(classifyScale->mag.data() + 1, classify/2); + + if (m_useReadahead) { + m_guide.updateGuidance(ratio, + prevOuthop, + classifyScale->mag.data(), + classifyScale->prevMag.data(), + cd->readahead.mag.data(), + cd->segmentation, + cd->prevSegmentation, + cd->nextSegmentation, + magMean, + m_unityCount, + isRealTime(), + tighterChannelLock, + cd->guidance); + } else { + m_guide.updateGuidance(ratio, + prevOuthop, + classifyScale->prevMag.data(), + classifyScale->prevMag.data(), + classifyScale->mag.data(), + cd->segmentation, + cd->prevSegmentation, + cd->nextSegmentation, + magMean, + m_unityCount, + isRealTime(), + tighterChannelLock, + cd->guidance); + } - m_guide.updateGuidance(ratio, - prevOuthop, - classifyScale->mag.data(), - classifyScale->prevMag.data(), - cd->readahead.mag.data(), - cd->segmentation, - cd->prevSegmentation, - cd->nextSegmentation, - v_mean(classifyScale->mag.data() + 1, classify/2), - m_unityCount, - isRealTime(), - tighterChannelLock, - cd->guidance); /* if (c == 0) { if (cd->guidance.kick.present) { diff --git a/src/finer/R3Stretcher.h b/src/finer/R3Stretcher.h index 3abae36..b3032ae 100644 --- a/src/finer/R3Stretcher.h +++ b/src/finer/R3Stretcher.h @@ -103,6 +103,27 @@ public: } protected: + struct Limits { + int minPreferredOuthop; + int maxPreferredOuthop; + int minInhop; + int maxInhopWithReadahead; + int maxInhop; + Limits(RubberBandStretcher::Options options) : + minPreferredOuthop(128), + maxPreferredOuthop(512), + minInhop(1), + maxInhopWithReadahead(1024), + maxInhop(1024) { + if (options & RubberBandStretcher::OptionWindowShort) { + // See note in calculateHop + maxPreferredOuthop = 640; + maxInhopWithReadahead = 512; + maxInhop = 1024; + } + } + }; + struct ClassificationReadaheadData { FixedVector timeDomain; FixedVector mag; @@ -290,6 +311,7 @@ protected: }; Parameters m_parameters; + const Limits m_limits; Log m_log; std::atomic m_timeRatio; @@ -303,6 +325,7 @@ protected: ChannelAssembly m_channelAssembly; std::unique_ptr m_calculator; std::unique_ptr m_resampler; + bool m_useReadahead; std::atomic m_inhop; int m_prevInhop; int m_prevOuthop; @@ -380,11 +403,12 @@ protected: } int getWindowSourceSize() const { - if (m_guideConfiguration.longestFftSize > - m_guideConfiguration.classificationFftSize) { + int sz = m_guideConfiguration.classificationFftSize + + m_limits.maxInhopWithReadahead; + if (m_guideConfiguration.longestFftSize > sz) { return m_guideConfiguration.longestFftSize; } else { - return (m_guideConfiguration.classificationFftSize * 3) / 2; + return sz; } } }; From f027a59fa490e1ca521a0f4a8ff5a23522a980b7 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Mon, 8 Aug 2022 16:55:53 +0100 Subject: [PATCH 21/41] I don't like adjusting the rational approximation - it changes (even if only nominally) the effective ratio based on quality. I think this is more appropriate --- src/common/BQResampler.cpp | 27 ++++++++++++++++++--------- src/common/BQResampler.h | 2 +- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/common/BQResampler.cpp b/src/common/BQResampler.cpp index 384e159..a3ae269 100644 --- a/src/common/BQResampler.cpp +++ b/src/common/BQResampler.cpp @@ -122,7 +122,7 @@ BQResampler::QualityParams::QualityParams(Quality q) k_snr = 70.0; k_transition = 0.2; cut = 0.9; - rational_max = 48000; + length_max = 576000; break; case FastestTolerable: p_multiple = 62; @@ -130,7 +130,7 @@ BQResampler::QualityParams::QualityParams(Quality q) k_snr = 90.0; k_transition = 0.05; cut = 0.975; - rational_max = 96000; + length_max = 1024000; break; case Best: p_multiple = 122; @@ -138,7 +138,7 @@ BQResampler::QualityParams::QualityParams(Quality q) k_snr = 100.0; k_transition = 0.01; cut = 0.995; - rational_max = 192000; + length_max = 0; break; } } @@ -378,12 +378,7 @@ BQResampler::pick_params(double ratio) const { // Farey algorithm, see // https://www.johndcook.com/blog/2010/10/20/best-rational-approximation/ - int max_denom; - if (m_dynamism == RatioMostlyFixed) { - max_denom = 192000; - } else { - max_denom = m_qparams.rational_max; - } + int max_denom = 192000; double a = 0.0, b = 1.0, c = 1.0, d = 0.0; double pa = a, pb = b, pc = c, pd = d; double eps = 1e-9; @@ -500,6 +495,20 @@ BQResampler::state_for_ratio(BQResampler::state &target_state, target_state.filter_length = int(parameters.peak_to_zero * m_qparams.p_multiple + 1); + if (m_qparams.length_max > 0 && + target_state.filter_length > m_qparams.length_max) { + int reduced_multiple = + int(floor(double(m_qparams.length_max) / parameters.peak_to_zero)); + int reduced_length = + int(parameters.peak_to_zero * reduced_multiple + 1); + if (m_debug_level > 0) { + cerr << "BQResampler: reducing filter length from " + << target_state.filter_length << " to " + << reduced_length << " based on quality settings" << endl; + } + target_state.filter_length = reduced_length; + } + if (target_state.filter_length % 2 == 0) { ++target_state.filter_length; } diff --git a/src/common/BQResampler.h b/src/common/BQResampler.h index 7319c5d..99a7ef7 100644 --- a/src/common/BQResampler.h +++ b/src/common/BQResampler.h @@ -71,7 +71,7 @@ private: double k_snr; double k_transition; double cut; - int rational_max; + int length_max; QualityParams(Quality); }; From f81598c166711e223b7018ffb86c2265229113ea Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Mon, 8 Aug 2022 17:02:08 +0100 Subject: [PATCH 22/41] Backed out changeset c03ab2acf74e No, that worked very badly in practice. Revisit --- src/common/BQResampler.cpp | 27 +++++++++------------------ src/common/BQResampler.h | 2 +- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/src/common/BQResampler.cpp b/src/common/BQResampler.cpp index a3ae269..384e159 100644 --- a/src/common/BQResampler.cpp +++ b/src/common/BQResampler.cpp @@ -122,7 +122,7 @@ BQResampler::QualityParams::QualityParams(Quality q) k_snr = 70.0; k_transition = 0.2; cut = 0.9; - length_max = 576000; + rational_max = 48000; break; case FastestTolerable: p_multiple = 62; @@ -130,7 +130,7 @@ BQResampler::QualityParams::QualityParams(Quality q) k_snr = 90.0; k_transition = 0.05; cut = 0.975; - length_max = 1024000; + rational_max = 96000; break; case Best: p_multiple = 122; @@ -138,7 +138,7 @@ BQResampler::QualityParams::QualityParams(Quality q) k_snr = 100.0; k_transition = 0.01; cut = 0.995; - length_max = 0; + rational_max = 192000; break; } } @@ -378,7 +378,12 @@ BQResampler::pick_params(double ratio) const { // Farey algorithm, see // https://www.johndcook.com/blog/2010/10/20/best-rational-approximation/ - int max_denom = 192000; + int max_denom; + if (m_dynamism == RatioMostlyFixed) { + max_denom = 192000; + } else { + max_denom = m_qparams.rational_max; + } double a = 0.0, b = 1.0, c = 1.0, d = 0.0; double pa = a, pb = b, pc = c, pd = d; double eps = 1e-9; @@ -495,20 +500,6 @@ BQResampler::state_for_ratio(BQResampler::state &target_state, target_state.filter_length = int(parameters.peak_to_zero * m_qparams.p_multiple + 1); - if (m_qparams.length_max > 0 && - target_state.filter_length > m_qparams.length_max) { - int reduced_multiple = - int(floor(double(m_qparams.length_max) / parameters.peak_to_zero)); - int reduced_length = - int(parameters.peak_to_zero * reduced_multiple + 1); - if (m_debug_level > 0) { - cerr << "BQResampler: reducing filter length from " - << target_state.filter_length << " to " - << reduced_length << " based on quality settings" << endl; - } - target_state.filter_length = reduced_length; - } - if (target_state.filter_length % 2 == 0) { ++target_state.filter_length; } diff --git a/src/common/BQResampler.h b/src/common/BQResampler.h index 99a7ef7..7319c5d 100644 --- a/src/common/BQResampler.h +++ b/src/common/BQResampler.h @@ -71,7 +71,7 @@ private: double k_snr; double k_transition; double cut; - int length_max; + int rational_max; QualityParams(Quality); }; From 8fee46b704b1f5f4256a71b11130b9b59bdc6889 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 9 Aug 2022 15:50:02 +0100 Subject: [PATCH 23/41] Add SLEEF FFT support --- meson.build | 35 +++++ meson_options.txt | 2 +- src/common/Allocators.h | 6 +- src/common/FFT.cpp | 322 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 360 insertions(+), 5 deletions(-) diff --git a/meson.build b/meson.build index 54951a2..e527bf3 100644 --- a/meson.build +++ b/meson.build @@ -112,6 +112,8 @@ foreach d: get_option('extra_include_dirs') endforeach fftw3_dep = dependency('fftw3', version: '>= 3.0.0', required: false) +sleef_dep = dependency('sleef', version: '>= 3.3.0', required: false) +sleefdft_dep = dependency('sleefdft', version: '>= 3.3.0', required: false) samplerate_dep = dependency('samplerate', version: '>= 0.1.8', required: false) sndfile_dep = dependency('sndfile', version: '>= 1.0.16', required: false) vamp_dep = dependency('vamp-sdk', version: '>= 2.9', required: false) @@ -164,6 +166,9 @@ if fft == 'builtin' if fftw3_dep.found() message('(to use FFTW instead, reconfigure with -Dfft=fftw)') endif + if sleef_dep.found() + message('(to use SLEEF instead, reconfigure with -Dfft=sleef)') + endif feature_defines += ['-DUSE_BUILTIN_FFT'] elif fft == 'kissfft' @@ -172,6 +177,9 @@ elif fft == 'kissfft' if fftw3_dep.found() message('(to use FFTW instead, reconfigure with -Dfft=fftw)') endif + if sleef_dep.found() + message('(to use SLEEF instead, reconfigure with -Dfft=sleef)') + endif feature_sources += ['src/ext/kissfft/kiss_fft.c', 'src/ext/kissfft/kiss_fftr.c'] feature_defines += ['-DHAVE_KISSFFT'] general_include_dirs += 'src/ext/kissfft' @@ -180,6 +188,9 @@ elif fft == 'fftw' if fftw3_dep.found() config_summary += { 'FFT': 'FFTW' } message('For FFT: using FFTW') + if sleef_dep.found() + message('(to use SLEEF instead, reconfigure with -Dfft=sleef)') + endif pkgconfig_requirements += fftw3_dep else fftw_dep = cpp.find_library('fftw3', @@ -187,10 +198,34 @@ elif fft == 'fftw' has_headers: ['fftw3.h'], header_args: extra_include_args, required: true) + config_summary += { 'FFT': 'FFTW' } endif feature_dependencies += fftw3_dep feature_defines += ['-DHAVE_FFTW3', '-DFFTW_DOUBLE_ONLY'] +elif fft == 'sleef' + if sleefdft_dep.found() and sleef_dep.found() + config_summary += { 'FFT': 'SLEEF' } + message('For FFT: using SLEEF') + pkgconfig_requirements += sleefdft_dep + pkgconfig_requirements += sleef_dep + else + sleefdft_dep = cpp.find_library('sleefdft', + dirs: get_option('extra_lib_dirs'), + has_headers: ['sleefdft.h'], + header_args: extra_include_args, + required: true) + sleef_dep = cpp.find_library('sleef', + dirs: get_option('extra_lib_dirs'), + has_headers: ['sleef.h'], + header_args: extra_include_args, + required: true) + config_summary += { 'FFT': 'SLEEF' } + endif + feature_dependencies += sleefdft_dep + feature_dependencies += sleef_dep + feature_defines += ['-DHAVE_SLEEF'] + elif fft == 'vdsp' config_summary += { 'FFT': 'vDSP' } message('For FFT: using vDSP') diff --git a/meson_options.txt b/meson_options.txt index c820c20..62e8337 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,7 +1,7 @@ option('fft', type: 'combo', - choices: ['auto', 'builtin', 'kissfft', 'fftw', 'vdsp', 'ipp'], + choices: ['auto', 'builtin', 'kissfft', 'fftw', 'sleef', 'vdsp', 'ipp'], value: 'auto', description: 'FFT library to use. The default (auto) will use vDSP if available, the builtin implementation otherwise.') diff --git a/src/common/Allocators.h b/src/common/Allocators.h index 1ac43f6..3c5bba2 100644 --- a/src/common/Allocators.h +++ b/src/common/Allocators.h @@ -85,9 +85,9 @@ T *allocate(size_t count) #else /* !MALLOC_IS_ALIGNED */ // That's the "sufficiently aligned" functions dealt with, the - // rest need a specific alignment provided to the call. 32-byte - // alignment is required for at least OpenMAX - static const int alignment = 32; + // rest need a specific alignment provided to the call. 64-byte + // alignment is enough for 8x8 double operations + static const int alignment = 64; #ifdef HAVE__ALIGNED_MALLOC ptr = _aligned_malloc(count * sizeof(T), alignment); diff --git a/src/common/FFT.cpp b/src/common/FFT.cpp index 984901f..66519d2 100644 --- a/src/common/FFT.cpp +++ b/src/common/FFT.cpp @@ -53,6 +53,13 @@ #include #endif +#ifdef HAVE_SLEEF +extern "C" { +#include +#include +} +#endif + #ifdef HAVE_VDSP #include #endif @@ -63,6 +70,7 @@ #ifndef HAVE_IPP #ifndef HAVE_FFTW3 +#ifndef HAVE_SLEEF #ifndef HAVE_KISSFFT #ifndef USE_BUILTIN_FFT #ifndef HAVE_VDSP @@ -72,6 +80,7 @@ #endif #endif #endif +#endif #include #include @@ -1425,6 +1434,302 @@ pthread_mutex_t D_FFTW::m_commonMutex = PTHREAD_MUTEX_INITIALIZER; #endif /* HAVE_FFTW3 */ +#ifdef HAVE_SLEEF + +class D_SLEEF : public FFTImpl +{ + bool isAligned(const void *ptr) { + return ! ((uintptr_t)ptr & 63); + } + +public: + D_SLEEF(int size) : + m_fplanf(0), m_fplani(0), m_fbuf(0), m_fpacked(0), + m_dplanf(0), m_dplani(0), m_dbuf(0), m_dpacked(0), + m_size(size) + { + } + + ~D_SLEEF() { + if (m_fplanf) { + SleefDFT_dispose(m_fplanf); + SleefDFT_dispose(m_fplani); + Sleef_free(m_fbuf); + Sleef_free(m_fpacked); + } + if (m_dplanf) { + SleefDFT_dispose(m_dplanf); + SleefDFT_dispose(m_dplani); + Sleef_free(m_dbuf); + Sleef_free(m_dpacked); + } + } + + int getSize() const { + return m_size; + } + + FFT::Precisions + getSupportedPrecisions() const { + return FFT::SinglePrecision | FFT::DoublePrecision; + } + + void initFloat() { + if (m_fplanf) return; + + m_fbuf = static_cast + (Sleef_malloc(m_size * sizeof(float))); + m_fpacked = static_cast + (Sleef_malloc((m_size + 2) * sizeof(float))); + + m_fplanf = SleefDFT_float_init1d + (m_size, m_fbuf, m_fpacked, + SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); + + m_fplani = SleefDFT_float_init1d + (m_size, m_fpacked, m_fbuf, + SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); + } + + void initDouble() { + if (m_dplanf) return; + + m_dbuf = static_cast + (Sleef_malloc(m_size * sizeof(double))); + m_dpacked = static_cast + (Sleef_malloc((m_size + 2) * sizeof(double))); + + m_dplanf = SleefDFT_double_init1d + (m_size, m_dbuf, m_dpacked, + SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); + + m_dplani = SleefDFT_double_init1d + (m_size, m_dpacked, m_dbuf, + SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); + } + + void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) { + const float *src[2] = { re, im }; + v_interleave(m_fpacked, src, 2, m_size/2 + 1); + } + + void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) { + const double *src[2] = { re, im }; + v_interleave(m_dpacked, src, 2, m_size/2 + 1); + } + + void unpackFloat(float *BQ_R__ re, float *BQ_R__ im) { + float *dst[2] = { re, im }; + v_deinterleave(dst, m_fpacked, 2, m_size/2 + 1); + } + + void unpackDouble(double *BQ_R__ re, double *BQ_R__ im) { + double *dst[2] = { re, im }; + v_deinterleave(dst, m_dpacked, 2, m_size/2 + 1); + } + + void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) { + if (!m_dplanf) initDouble(); + if (isAligned(realIn)) { + SleefDFT_double_execute(m_dplanf, realIn, 0); + } else { + v_copy(m_dbuf, realIn, m_size); + SleefDFT_double_execute(m_dplanf, 0, 0); + } + unpackDouble(realOut, imagOut); + } + + void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) { + if (!m_dplanf) initDouble(); + if (isAligned(realIn) && isAligned(complexOut)) { + SleefDFT_double_execute(m_dplanf, realIn, complexOut); + } else { + v_copy(m_dbuf, realIn, m_size); + SleefDFT_double_execute(m_dplanf, 0, 0); + v_copy(complexOut, m_dpacked, m_size + 2); + } + } + + void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) { + if (!m_dplanf) initDouble(); + if (isAligned(realIn)) { + SleefDFT_double_execute(m_dplanf, realIn, 0); + } else { + v_copy(m_dbuf, realIn, m_size); + SleefDFT_double_execute(m_dplanf, 0, 0); + } + v_cartesian_interleaved_to_polar(magOut, phaseOut, m_dpacked, m_size/2+1); + } + + void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) { + if (!m_dplanf) initDouble(); + if (isAligned(realIn)) { + SleefDFT_double_execute(m_dplanf, realIn, 0); + } else { + v_copy(m_dbuf, realIn, m_size); + SleefDFT_double_execute(m_dplanf, 0, 0); + } + v_cartesian_interleaved_to_magnitudes(magOut, m_dpacked, m_size/2+1); + } + + void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) { + if (!m_fplanf) initFloat(); + if (isAligned(realIn)) { + SleefDFT_float_execute(m_fplanf, realIn, 0); + } else { + v_copy(m_fbuf, realIn, m_size); + SleefDFT_float_execute(m_fplanf, 0, 0); + } + unpackFloat(realOut, imagOut); + } + + void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) { + if (!m_fplanf) initFloat(); + if (isAligned(realIn) && isAligned(complexOut)) { + SleefDFT_float_execute(m_fplanf, realIn, complexOut); + } else { + v_copy(m_fbuf, realIn, m_size); + SleefDFT_float_execute(m_fplanf, 0, 0); + v_copy(complexOut, m_fpacked, m_size + 2); + } + } + + void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) { + if (!m_fplanf) initFloat(); + if (isAligned(realIn)) { + SleefDFT_float_execute(m_fplanf, realIn, 0); + } else { + v_copy(m_fbuf, realIn, m_size); + SleefDFT_float_execute(m_fplanf, 0, 0); + } + v_cartesian_interleaved_to_polar(magOut, phaseOut, m_fpacked, m_size/2+1); + } + + void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) { + if (!m_fplanf) initFloat(); + if (isAligned(realIn)) { + SleefDFT_float_execute(m_fplanf, realIn, 0); + } else { + v_copy(m_fbuf, realIn, m_size); + SleefDFT_float_execute(m_fplanf, 0, 0); + } + v_cartesian_interleaved_to_magnitudes(magOut, m_fpacked, m_size/2+1); + } + + void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) { + if (!m_dplanf) initDouble(); + packDouble(realIn, imagIn); + if (isAligned(realOut)) { + SleefDFT_double_execute(m_dplani, 0, realOut); + } else { + SleefDFT_double_execute(m_dplani, 0, 0); + v_copy(realOut, m_dbuf, m_size); + } + } + + void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) { + if (!m_dplanf) initDouble(); + if (isAligned(complexIn) && isAligned(realOut)) { + SleefDFT_double_execute(m_dplani, complexIn, realOut); + } else { + v_copy(m_dpacked, complexIn, m_size + 2); + SleefDFT_double_execute(m_dplani, 0, 0); + v_copy(realOut, m_dbuf, m_size); + } + } + + void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) { + if (!m_dplanf) initDouble(); + v_polar_to_cartesian_interleaved(m_dpacked, magIn, phaseIn, m_size/2+1); + if (isAligned(realOut)) { + SleefDFT_double_execute(m_dplani, 0, realOut); + } else { + SleefDFT_double_execute(m_dplani, 0, 0); + v_copy(realOut, m_dbuf, m_size); + } + } + + void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) { + if (!m_dplanf) initDouble(); + const int hs = m_size/2; + for (int i = 0; i <= hs; ++i) { + m_dpacked[i*2] = log(magIn[i] + 0.000001); + m_dpacked[i*2+1] = 0.0; + } + if (isAligned(cepOut)) { + SleefDFT_double_execute(m_dplani, 0, cepOut); + } else { + SleefDFT_double_execute(m_dplani, 0, 0); + v_copy(cepOut, m_dbuf, m_size); + } + } + + void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) { + if (!m_fplanf) initFloat(); + packFloat(realIn, imagIn); + if (isAligned(realOut)) { + SleefDFT_float_execute(m_dplani, 0, realOut); + } else { + SleefDFT_float_execute(m_fplani, 0, 0); + v_copy(realOut, m_fbuf, m_size); + } + } + + void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) { + if (!m_fplanf) initFloat(); + if (isAligned(complexIn) && isAligned(realOut)) { + SleefDFT_float_execute(m_fplani, complexIn, realOut); + } else { + v_copy(m_fpacked, complexIn, m_size + 2); + SleefDFT_float_execute(m_fplani, 0, 0); + v_copy(realOut, m_fbuf, m_size); + } + } + + void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) { + if (!m_fplanf) initFloat(); + v_polar_to_cartesian_interleaved(m_fpacked, magIn, phaseIn, m_size/2+1); + if (isAligned(realOut)) { + SleefDFT_float_execute(m_fplani, 0, realOut); + } else { + SleefDFT_float_execute(m_fplani, 0, 0); + v_copy(realOut, m_fbuf, m_size); + } + } + + void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) { + if (!m_fplanf) initFloat(); + const int hs = m_size/2; + for (int i = 0; i <= hs; ++i) { + m_fpacked[i*2] = logf(magIn[i] + 0.000001f); + m_fpacked[i*2+1] = 0.0; + } + if (isAligned(cepOut)) { + SleefDFT_float_execute(m_fplani, 0, cepOut); + } else { + SleefDFT_float_execute(m_fplani, 0, 0); + v_copy(cepOut, m_fbuf, m_size); + } + } + +private: + SleefDFT *m_fplanf; + SleefDFT *m_fplani; + + float *m_fbuf; + float *m_fpacked; + + SleefDFT *m_dplanf; + SleefDFT *m_dplani; + + double *m_dbuf; + double *m_dpacked; + + const int m_size; +}; + +#endif /* HAVE_SLEEF */ + #ifdef HAVE_KISSFFT class D_KISSFFT : public FFTImpl @@ -2266,6 +2571,9 @@ getImplementationDetails() #ifdef HAVE_FFTW3 impls["fftw"] = SizeConstraintNone; #endif +#ifdef HAVE_SLEEF + impls["sleef"] = SizeConstraintEvenPowerOfTwo; +#endif #ifdef HAVE_KISSFFT impls["kissfft"] = SizeConstraintEven; #endif @@ -2310,7 +2618,7 @@ pickImplementation(int size) } std::string preference[] = { - "ipp", "vdsp", "fftw", "builtin", "kissfft" + "ipp", "vdsp", "sleef", "fftw", "builtin", "kissfft" }; for (int i = 0; i < int(sizeof(preference)/sizeof(preference[0])); ++i) { @@ -2390,6 +2698,10 @@ FFT::FFT(int size, int debugLevel) : } else if (impl == "fftw") { #ifdef HAVE_FFTW3 d = new FFTs::D_FFTW(size); +#endif + } else if (impl == "sleef") { +#ifdef HAVE_SLEEF + d = new FFTs::D_SLEEF(size); #endif } else if (impl == "kissfft") { #ifdef HAVE_KISSFFT @@ -2650,6 +2962,14 @@ FFT::tune() candidates["fftw"] = d; #endif +#ifdef HAVE_SLEEF + os << "Constructing new SLEEF FFT object for size " << size << "..." << std::endl; + d = new FFTs::D_SLEEF(size); + d->initFloat(); + d->initDouble(); + candidates["sleef"] = d; +#endif + #ifdef HAVE_KISSFFT os << "Constructing new KISSFFT object for size " << size << "..." << std::endl; d = new FFTs::D_KISSFFT(size); From 147e88ad5572d3bb07f4d417a430e774ea086121 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 9 Aug 2022 16:44:56 +0100 Subject: [PATCH 24/41] Add SLEEF to README. I was going to add it to the CI, but it looks as if Ubuntu's libsleef-dev doesn't include sleefdft. --- .build.yml | 4 ++++ COMPILING.md | 7 +++++++ README.md | 1 + 3 files changed, 12 insertions(+) diff --git a/.build.yml b/.build.yml index 52785d3..de6b8f0 100644 --- a/.build.yml +++ b/.build.yml @@ -4,6 +4,7 @@ packages: - libsamplerate0-dev - libsndfile1-dev - libfftw3-dev +# - libsleef-dev - ladspa-sdk - lv2-dev - vamp-plugin-sdk @@ -19,6 +20,7 @@ tasks: meson build_speex -Dresampler=speex meson build_libsamplerate -Dresampler=libsamplerate meson build_fftw -Dfft=fftw +# meson build_sleef -Dfft=sleef meson build_kissfft -Dfft=kissfft - build: | cd rubberband @@ -33,6 +35,8 @@ tasks: build_libsamplerate/rubberband -V ninja -C build_fftw meson test -C build_fftw +# ninja -C build_sleef +# meson test -C build_sleef build_fftw/rubberband -V ninja -C build_kissfft meson test -C build_kissfft diff --git a/COMPILING.md b/COMPILING.md index 16b0f3b..1a923de 100644 --- a/COMPILING.md +++ b/COMPILING.md @@ -256,6 +256,13 @@ FFTW3 -Dfft=fftw -DHAVE_FFTW3 GPL. A bit faster than built-in, a bit slower than Accelerate. +SLEEF -Dfft=sleef -DHAVE_SLEEF Usually very fast. Not as widely + distributed as FFTW3. Requires + both libsleef and libsleefdft. + Can be distributed with either + the Rubber Band GPL or + commercial licence. + KissFFT -Dfft=kissfft -DHAVE_KISSFFT Single precision. Only indicated for use with diff --git a/README.md b/README.md index fd57943..b3d69bc 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ details. * FFTW3 - GPL; proprietary licence needed for redistribution * Intel IPP - Proprietary; licence needed for redistribution + * SLEEF - BSD-like * KissFFT - BSD-like * libsamplerate - BSD-like from version 0.1.9 onwards * Speex - BSD-like From 1d7a684e1a47be782a472a8e2e993ca35a151652 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 9 Aug 2022 16:46:26 +0100 Subject: [PATCH 25/41] Looks like that was not the way to comment something out --- .build.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.build.yml b/.build.yml index de6b8f0..52785d3 100644 --- a/.build.yml +++ b/.build.yml @@ -4,7 +4,6 @@ packages: - libsamplerate0-dev - libsndfile1-dev - libfftw3-dev -# - libsleef-dev - ladspa-sdk - lv2-dev - vamp-plugin-sdk @@ -20,7 +19,6 @@ tasks: meson build_speex -Dresampler=speex meson build_libsamplerate -Dresampler=libsamplerate meson build_fftw -Dfft=fftw -# meson build_sleef -Dfft=sleef meson build_kissfft -Dfft=kissfft - build: | cd rubberband @@ -35,8 +33,6 @@ tasks: build_libsamplerate/rubberband -V ninja -C build_fftw meson test -C build_fftw -# ninja -C build_sleef -# meson test -C build_sleef build_fftw/rubberband -V ninja -C build_kissfft meson test -C build_kissfft From 515f5e2bd1455efd309c2efa4d6bef359c348a42 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 18 Aug 2022 12:03:29 +0100 Subject: [PATCH 26/41] Allow pre-configuration if explicitly specified --- single/RubberBandSingle.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/single/RubberBandSingle.cpp b/single/RubberBandSingle.cpp index 91d78d0..8cdcb14 100644 --- a/single/RubberBandSingle.cpp +++ b/single/RubberBandSingle.cpp @@ -44,6 +44,8 @@ standalone library. */ +#ifndef ALREADY_CONFIGURED + #define USE_BQRESAMPLER 1 #define NO_TIMING 1 @@ -56,6 +58,8 @@ #define USE_BUILTIN_FFT 1 #endif +#endif + #include "../src/faster/AudioCurveCalculator.cpp" #include "../src/faster/CompoundAudioCurve.cpp" #include "../src/faster/HighFrequencyAudioCurve.cpp" From c9565266f2daf3ea54169e73a9a6d9fd24564c57 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 18 Aug 2022 14:27:57 +0100 Subject: [PATCH 27/41] Adjust the limits, with performance in mind - to be auditioned --- src/finer/R3Stretcher.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/finer/R3Stretcher.h b/src/finer/R3Stretcher.h index b3032ae..daf9090 100644 --- a/src/finer/R3Stretcher.h +++ b/src/finer/R3Stretcher.h @@ -117,9 +117,10 @@ protected: maxInhop(1024) { if (options & RubberBandStretcher::OptionWindowShort) { // See note in calculateHop + minPreferredOuthop = 256; maxPreferredOuthop = 640; maxInhopWithReadahead = 512; - maxInhop = 1024; + maxInhop = 1560; } } }; From e58c6018c57830c0ec6967a87dc8a4d84ae19ce6 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 18 Aug 2022 15:24:24 +0100 Subject: [PATCH 28/41] This appears benign and should be slightly quicker --- src/finer/R3Stretcher.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index ca48abe..dc85c8d 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -92,6 +92,10 @@ R3Stretcher::R3Stretcher(Parameters parameters, BinClassifier::Parameters classifierParameters (classificationBins, 9, 1, 10, 2.0, 2.0); + if (isSingleWindowed()) { + classifierParameters.horizontalFilterLength = 7; + } + int inRingBufferSize = getWindowSourceSize() * 2; int outRingBufferSize = getWindowSourceSize() * 16; From 53eca719fb47c99f8eaaa6dacbf0ac74351589d6 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 18 Aug 2022 16:41:43 +0100 Subject: [PATCH 29/41] Apply gradual phase-reset on unity in the R2 stretcher (R3 already does this) --- src/faster/StretcherChannelData.cpp | 2 ++ src/faster/StretcherChannelData.h | 1 + src/faster/StretcherProcess.cpp | 21 +++++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/faster/StretcherChannelData.cpp b/src/faster/StretcherChannelData.cpp index 376b76c..483c572 100644 --- a/src/faster/StretcherChannelData.cpp +++ b/src/faster/StretcherChannelData.cpp @@ -87,6 +87,8 @@ R2Stretcher::ChannelData::construct(const std::set &sizes, interpolator = allocate_and_zero(maxSize); interpolatorScale = 0; + unityResetLow = 16000.f; + for (std::set::const_iterator i = sizes.begin(); i != sizes.end(); ++i) { ffts[*i] = new FFT(*i); diff --git a/src/faster/StretcherChannelData.h b/src/faster/StretcherChannelData.h index 1b2d673..b0ce210 100644 --- a/src/faster/StretcherChannelData.h +++ b/src/faster/StretcherChannelData.h @@ -113,6 +113,7 @@ public: float *ms; // only used when mid-side processing float *interpolator; // only used when time-domain smoothing is on int interpolatorScale; + float unityResetLow; // for gradual phase-reset on unity ratio float *fltbuf; process_t *dblbuf; // owned by FFT object, only used for time domain FFT i/o diff --git a/src/faster/StretcherProcess.cpp b/src/faster/StretcherProcess.cpp index 5074a93..4757d5e 100644 --- a/src/faster/StretcherProcess.cpp +++ b/src/faster/StretcherProcess.cpp @@ -744,12 +744,29 @@ R2Stretcher::modifyChunk(size_t channel, int bandlow = lrint((150 * m_fftSize) / rate); int bandhigh = lrint((1000 * m_fftSize) / rate); + float r = getEffectiveRatio(); + + bool unity = (fabsf(r - 1.f) < 1.e-6f); + if (unity) { + if (!phaseReset) { + phaseReset = true; + bandlimited = true; + bandlow = lrint((cd.unityResetLow * m_fftSize) / rate); + bandhigh = count; + if (bandlow > 0) { + m_log.log(2, "unity: bandlow & high", bandlow, bandhigh); + } + } + cd.unityResetLow *= 0.9f; + } else { + cd.unityResetLow = 16000.f; + } + float freq0 = m_freq0; float freq1 = m_freq1; float freq2 = m_freq2; - + if (laminar) { - float r = getEffectiveRatio(); if (r > 1) { float rf0 = 600 + (600 * ((r-1)*(r-1)*(r-1)*2)); float f1ratio = freq1 / freq0; From 4336d47a02664876b7a0d533d67cff4e20f8722a Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 1 Sep 2022 10:41:25 +0100 Subject: [PATCH 30/41] With longer process sizes we can end up with insufficient space in the inbuf, but enough in the outbuf following the previous retrieve call for a consume to be possible before writing to the inbuf --- main/main.cpp | 1 + src/finer/R3Stretcher.cpp | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/main/main.cpp b/main/main.cpp index fcb5920..2bf748d 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -673,6 +673,7 @@ int main(int argc, char **argv) RubberBandStretcher ts(sfinfo.samplerate, channels, options, ratio, frequencyshift); ts.setExpectedInputDuration(sfinfo.frames); + ts.setMaxProcessSize(bs); int frame = 0; int percent = 0; diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index dc85c8d..fd719c1 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -647,6 +647,11 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final) } size_t ws = m_channelData[0]->inbuf->getWriteSpace(); + if (samples > ws) { + m_log.log(2, "R3Stretcher::process: insufficient space in input buffer, attempting consume before write"); + consume(); + ws = m_channelData[0]->inbuf->getWriteSpace(); + } if (samples > ws) { m_log.log(0, "R3Stretcher::process: WARNING: Forced to increase input buffer size. Either setMaxProcessSize was not properly called or process is being called repeatedly without retrieve. Write space and samples", ws, samples); size_t newSize = m_channelData[0]->inbuf->getSize() - ws + samples; From e0a6fc686d0e6e6398272b8d3be2cb3a7fa0ffb0 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 2 Sep 2022 11:10:44 +0100 Subject: [PATCH 31/41] Add support for external libspeexdsp --- meson.build | 46 ++++++++++++++++++++++++++-------------- meson_options.txt | 2 +- src/common/Resampler.cpp | 19 +++++++++++++++-- 3 files changed, 48 insertions(+), 19 deletions(-) diff --git a/meson.build b/meson.build index 42f7f7f..400aaec 100644 --- a/meson.build +++ b/meson.build @@ -113,6 +113,7 @@ endforeach fftw3_dep = dependency('fftw3', version: '>= 3.0.0', required: false) samplerate_dep = dependency('samplerate', version: '>= 0.1.8', required: false) +speexdsp_dep = dependency('speexdsp', version: '>= 1.0.0', required: false) sndfile_dep = dependency('sndfile', version: '>= 1.0.16', required: false) vamp_dep = dependency('vamp-sdk', version: '>= 2.9', required: false) boost_unit_test_dep = dependency('boost', modules: ['unit_test_framework'], version: '>= 1.73', required: false) @@ -177,17 +178,16 @@ elif fft == 'kissfft' general_include_dirs += 'src/ext/kissfft' elif fft == 'fftw' - if fftw3_dep.found() - config_summary += { 'FFT': 'FFTW' } - message('For FFT: using FFTW') - pkgconfig_requirements += fftw3_dep - else - fftw_dep = cpp.find_library('fftw3', - dirs: get_option('extra_lib_dirs'), - has_headers: ['fftw3.h'], - header_args: extra_include_args, - required: true) + if not fftw3_dep.found() + fftw3_dep = cpp.find_library('fftw3', + dirs: get_option('extra_lib_dirs'), + has_headers: ['fftw3.h'], + header_args: extra_include_args, + required: true) endif + config_summary += { 'FFT': 'FFTW' } + message('For FFT: using FFTW') + pkgconfig_requirements += fftw3_dep feature_dependencies += fftw3_dep feature_defines += ['-DHAVE_FFTW3', '-DFFTW_DOUBLE_ONLY'] @@ -223,27 +223,41 @@ if resampler == 'builtin' feature_defines += ['-DUSE_BQRESAMPLER'] elif resampler == 'libsamplerate' - if samplerate_dep.found() - config_summary += { 'Resampler': 'libsamplerate' } - message('For resampler: using libsamplerate') - pkgconfig_requirements += samplerate_dep - else + if not samplerate_dep.found() samplerate_dep = cpp.find_library('samplerate', dirs: get_option('extra_lib_dirs'), has_headers: ['samplerate.h'], header_args: extra_include_args, required: true) endif + config_summary += { 'Resampler': 'libsamplerate' } + message('For resampler: using libsamplerate') feature_dependencies += samplerate_dep + pkgconfig_requirements += samplerate_dep feature_defines += ['-DHAVE_LIBSAMPLERATE'] elif resampler == 'speex' config_summary += { 'Resampler': 'Speex' } - message('For resampler: using Speex') + message('For resampler: using bundled Speex') message('(consider libsamplerate if time-varying pitch shift is required)') feature_sources += ['src/ext/speex/resample.c'] feature_defines += ['-DUSE_SPEEX'] +elif resampler == 'libspeexdsp' + if not speexdsp_dep.found() + speexdsp_dep = cpp.find_library('speexdsp', + dirs: get_option('extra_lib_dirs'), + has_headers: ['speex/speex_resampler.h'], + header_args: extra_include_args, + required: true) + endif + config_summary += { 'Resampler': 'libspeexdsp' } + message('For resampler: using Speex DSP library') + message('(consider libsamplerate if time-varying pitch shift is required)') + feature_dependencies += speexdsp_dep + pkgconfig_requirements += speexdsp_dep + feature_defines += ['-DHAVE_LIBSPEEXDSP'] + elif resampler == 'ipp' if ipp_path != '' config_summary += { 'Resampler': 'Intel IPP' } diff --git a/meson_options.txt b/meson_options.txt index c820c20..3ca090c 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -7,7 +7,7 @@ option('fft', option('resampler', type: 'combo', - choices: ['auto', 'builtin', 'libsamplerate', 'speex', 'ipp'], + choices: ['auto', 'builtin', 'libsamplerate', 'speex', 'libspeexdsp', 'ipp'], value: 'auto', description: 'Resampler library to use. The default (auto) simply uses the builtin implementation.') diff --git a/src/common/Resampler.cpp b/src/common/Resampler.cpp index 32444a7..df46471 100644 --- a/src/common/Resampler.cpp +++ b/src/common/Resampler.cpp @@ -55,6 +55,10 @@ #ifdef USE_SPEEX #include "../ext/speex/speex_resampler.h" +#else +#ifdef HAVE_LIBSPEEXDSP +#include +#endif #endif #ifdef USE_BQRESAMPLER @@ -64,6 +68,7 @@ #ifndef HAVE_IPP #ifndef HAVE_LIBSAMPLERATE #ifndef HAVE_LIBRESAMPLE +#ifndef HAVE_LIBSPEEXDSP #ifndef USE_SPEEX #ifndef USE_BQRESAMPLER #error No resampler implementation selected! @@ -72,6 +77,7 @@ #endif #endif #endif +#endif #define BQ_R__ R__ @@ -1106,7 +1112,7 @@ D_BQResampler::reset() #endif /* USE_BQRESAMPLER */ -#ifdef USE_SPEEX +#if defined(USE_SPEEX) || defined(HAVE_LIBSPEEXDSP) class D_Speex : public Resampler::Impl { @@ -1404,6 +1410,9 @@ Resampler::Resampler(Resampler::Parameters params, int channels) #ifdef USE_SPEEX m_method = 2; #endif +#ifdef HAVE_LIBSPEEXDSP + m_method = 2; +#endif #ifdef HAVE_LIBRESAMPLE m_method = 3; #endif @@ -1425,6 +1434,9 @@ Resampler::Resampler(Resampler::Parameters params, int channels) #ifdef USE_SPEEX m_method = 2; #endif +#ifdef HAVE_LIBSPEEXDSP + m_method = 2; +#endif #ifdef USE_BQRESAMPLER m_method = 4; #endif @@ -1443,6 +1455,9 @@ Resampler::Resampler(Resampler::Parameters params, int channels) #ifdef USE_SPEEX m_method = 2; #endif +#ifdef HAVE_LIBSPEEXDSP + m_method = 2; +#endif #ifdef USE_BQRESAMPLER m_method = 4; #endif @@ -1483,7 +1498,7 @@ Resampler::Resampler(Resampler::Parameters params, int channels) break; case 2: -#ifdef USE_SPEEX +#if defined(USE_SPEEX) || defined(HAVE_LIBSPEEXDSP) d = new Resamplers::D_Speex (params.quality, params.ratioChange, channels, From a525654d6a48370635380bd5502910fdfe1d02f4 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 2 Sep 2022 12:06:11 +0100 Subject: [PATCH 32/41] Pull out nearest-rational into mathmisc so it can be used in other places than BQResampler --- src/common/mathmisc.cpp | 69 +++++++++++++++++++++++++++++++++++++++++ src/common/mathmisc.h | 2 ++ 2 files changed, 71 insertions(+) create mode 100644 src/common/mathmisc.cpp diff --git a/src/common/mathmisc.cpp b/src/common/mathmisc.cpp new file mode 100644 index 0000000..cfe2b1a --- /dev/null +++ b/src/common/mathmisc.cpp @@ -0,0 +1,69 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Rubber Band Library + An audio time-stretching and pitch-shifting library. + Copyright 2007-2022 Particular Programs Ltd. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. + + Alternatively, if you have a valid commercial licence for the + Rubber Band Library obtained by agreement with the copyright + holders, you may redistribute and/or modify it under the terms + described in that licence. + + If you wish to distribute code using the Rubber Band Library + under terms other than those of the GNU General Public License, + you must obtain a valid commercial licence before doing so. +*/ + +#include "mathmisc.h" + +namespace RubberBand { + +void pickNearestRational(double ratio, int max_denom, int &num, int &denom) +{ + // Farey algorithm, see + // https://www.johndcook.com/blog/2010/10/20/best-rational-approximation/ + double a = 0.0, b = 1.0, c = 1.0, d = 0.0; + double pa = a, pb = b, pc = c, pd = d; + double eps = 1e-9; + while (b <= max_denom && d <= max_denom) { + double mediant = (a + c) / (b + d); + if (fabs(ratio - mediant) < eps) { + if (b + d <= max_denom) { + num = a + c; + denom = b + d; + return; + } else if (d > b) { + num = c; + denom = d; + return; + } else { + num = a; + denom = b; + return; + } + } + if (ratio > mediant) { + pa = a; pb = b; + a += c; b += d; + } else { + pc = c; pd = d; + c += a; d += b; + } + } + if (fabs(ratio - (pc / pd)) < fabs(ratio - (pa / pb))) { + num = pc; + denom = pd; + } else { + num = pa; + denom = pb; + } +} + +} diff --git a/src/common/mathmisc.h b/src/common/mathmisc.h index e6697ff..3928b8e 100644 --- a/src/common/mathmisc.h +++ b/src/common/mathmisc.h @@ -51,6 +51,8 @@ inline double frequencyForBin(int b, int fftSize, double sampleRate) { return (double(b) * sampleRate) / double(fftSize); } +void pickNearestRational(double ratio, int maxDenom, int &num, int &denom); + } #endif From 2025c51d643130cdec51f084116e20905bc98de5 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 2 Sep 2022 12:06:45 +0100 Subject: [PATCH 33/41] Use extracted pickNearestRational; ensure the max rational is actually the max for both num and denom --- src/common/BQResampler.cpp | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/src/common/BQResampler.cpp b/src/common/BQResampler.cpp index 384e159..af7bb1e 100644 --- a/src/common/BQResampler.cpp +++ b/src/common/BQResampler.cpp @@ -31,6 +31,8 @@ #include "Allocators.h" #include "VectorOps.h" +#include "mathmisc.h" + #define BQ_R__ R__ using std::vector; @@ -376,41 +378,18 @@ BQResampler::fill_params(double ratio, double numd, double denomd) const BQResampler::params BQResampler::pick_params(double ratio) const { - // Farey algorithm, see - // https://www.johndcook.com/blog/2010/10/20/best-rational-approximation/ int max_denom; if (m_dynamism == RatioMostlyFixed) { max_denom = 192000; } else { max_denom = m_qparams.rational_max; - } - double a = 0.0, b = 1.0, c = 1.0, d = 0.0; - double pa = a, pb = b, pc = c, pd = d; - double eps = 1e-9; - while (b <= max_denom && d <= max_denom) { - double mediant = (a + c) / (b + d); - if (fabs(ratio - mediant) < eps) { - if (b + d <= max_denom) { - return fill_params(ratio, a + c, b + d); - } else if (d > b) { - return fill_params(ratio, c, d); - } else { - return fill_params(ratio, a, b); - } - } - if (ratio > mediant) { - pa = a; pb = b; - a += c; b += d; - } else { - pc = c; pd = d; - c += a; d += b; + if (ratio > 1.0) { + max_denom = int(ceil(max_denom / ratio)); } } - if (fabs(ratio - (pc / pd)) < fabs(ratio - (pa / pb))) { - return fill_params(ratio, pc, pd); - } else { - return fill_params(ratio, pa, pb); - } + int num, denom; + pickNearestRational(ratio, max_denom, num, denom); + return fill_params(ratio, num, denom); } void From aa1865d22b5966dacf352495ed7b233d75da0d6d Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 2 Sep 2022 12:07:29 +0100 Subject: [PATCH 34/41] Add mathmisc.cpp --- meson.build | 1 + single/RubberBandSingle.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/meson.build b/meson.build index 99006d0..213084c 100644 --- a/meson.build +++ b/meson.build @@ -49,6 +49,7 @@ library_sources = [ 'src/common/Resampler.cpp', 'src/common/StretchCalculator.cpp', 'src/common/sysutils.cpp', + 'src/common/mathmisc.cpp', 'src/common/Thread.cpp', 'src/finer/R3Stretcher.cpp', ] diff --git a/single/RubberBandSingle.cpp b/single/RubberBandSingle.cpp index 8cdcb14..d2f6138 100644 --- a/single/RubberBandSingle.cpp +++ b/single/RubberBandSingle.cpp @@ -73,6 +73,7 @@ #include "../src/common/Allocators.cpp" #include "../src/common/StretchCalculator.cpp" #include "../src/common/sysutils.cpp" +#include "../src/common/mathmisc.cpp" #include "../src/common/Thread.cpp" #include "../src/faster/StretcherChannelData.cpp" #include "../src/faster/R2Stretcher.cpp" From 8c817265d5afb1b7a1710d2d44f14c548d137392 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 2 Sep 2022 12:07:41 +0100 Subject: [PATCH 35/41] Pick more rational rational values --- src/common/Resampler.cpp | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/common/Resampler.cpp b/src/common/Resampler.cpp index df46471..d256820 100644 --- a/src/common/Resampler.cpp +++ b/src/common/Resampler.cpp @@ -26,6 +26,8 @@ #include "Allocators.h" #include "VectorOps.h" +#include "mathmisc.h" + #include #include @@ -1220,18 +1222,22 @@ D_Speex::setRatio(double ratio) // Speex wants a ratio of two unsigned integers, not a single // float. Let's do that. - unsigned int big = 272408136U; - unsigned int denom = 1, num = 1; - - if (ratio < 1.f) { - denom = big; - double dnum = double(big) * double(ratio); - num = (unsigned int)dnum; - } else if (ratio > 1.f) { - num = big; - double ddenom = double(big) / double(ratio); - denom = (unsigned int)ddenom; + int max_denom = 96000; + if (ratio > 1.0) { + max_denom = int(ceil(96000 / ratio)); } + + int inum, idenom; + pickNearestRational(ratio, max_denom, inum, idenom); + + if (inum < 0 || idenom < 0) { + cerr << "Resampler::setRatio: Internal error: " + << "numerator or denominator < 0 (" + << inum << "/" << idenom << ")" << endl; + return; + } + + unsigned int num = inum, denom = idenom; if (m_debugLevel > 1) { cerr << "D_Speex: Desired ratio " << ratio << ", requesting ratio " @@ -1246,8 +1252,12 @@ D_Speex::setRatio(double ratio) (m_resampler, denom, num, fromRate, toRate); if (err) { - cerr << "Resampler::Resampler: failed to set rate on Speex resampler" - << endl; + cerr << "Resampler::Resampler: failed to set rate on Speex resampler" + << " (with ratio = " << ratio << " [ratio-1 = " << ratio - 1.0 + << "], denom = " << denom + << ", num = " << num << ", fromRate = " << fromRate + << ", toRate = " << toRate << ", err = " << err + << ")" << endl; #ifndef NO_EXCEPTIONS throw Resampler::ImplementationError; #endif From deb84a5f78bb53ad82c06d3126637de36faab497 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 2 Sep 2022 13:11:15 +0100 Subject: [PATCH 36/41] Update build files --- otherbuilds/Android.mk | 1 + otherbuilds/Makefile.ios | 1 + otherbuilds/Makefile.linux | 1 + otherbuilds/Makefile.macos | 1 + otherbuilds/Makefile.macos-universal | 1 + otherbuilds/rubberband-library.vcxproj | 1 + src/common/mathmisc.h | 2 ++ 7 files changed, 8 insertions(+) diff --git a/otherbuilds/Android.mk b/otherbuilds/Android.mk index f1ef0b2..681a677 100644 --- a/otherbuilds/Android.mk +++ b/otherbuilds/Android.mk @@ -28,6 +28,7 @@ RUBBERBAND_SRC_FILES := \ $(RUBBERBAND_SRC_PATH)/common/Allocators.cpp \ $(RUBBERBAND_SRC_PATH)/common/StretchCalculator.cpp \ $(RUBBERBAND_SRC_PATH)/common/sysutils.cpp \ + $(RUBBERBAND_SRC_PATH)/common/mathmisc.cpp \ $(RUBBERBAND_SRC_PATH)/common/Thread.cpp \ $(RUBBERBAND_SRC_PATH)/finer/R3StretcherImpl.cpp diff --git a/otherbuilds/Makefile.ios b/otherbuilds/Makefile.ios index d1a1977..b7fec88 100644 --- a/otherbuilds/Makefile.ios +++ b/otherbuilds/Makefile.ios @@ -55,6 +55,7 @@ LIBRARY_SOURCES := \ src/common/Resampler.cpp \ src/common/StretchCalculator.cpp \ src/common/sysutils.cpp \ + src/common/mathmisc.cpp \ src/common/Thread.cpp \ src/finer/R3Stretcher.cpp diff --git a/otherbuilds/Makefile.linux b/otherbuilds/Makefile.linux index e12cbcd..0d91e3a 100644 --- a/otherbuilds/Makefile.linux +++ b/otherbuilds/Makefile.linux @@ -44,6 +44,7 @@ LIBRARY_SOURCES := \ src/common/Resampler.cpp \ src/common/StretchCalculator.cpp \ src/common/sysutils.cpp \ + src/common/mathmisc.cpp \ src/common/Thread.cpp \ src/finer/R3Stretcher.cpp diff --git a/otherbuilds/Makefile.macos b/otherbuilds/Makefile.macos index 5819dbd..9a43be3 100644 --- a/otherbuilds/Makefile.macos +++ b/otherbuilds/Makefile.macos @@ -44,6 +44,7 @@ LIBRARY_SOURCES := \ src/common/Resampler.cpp \ src/common/StretchCalculator.cpp \ src/common/sysutils.cpp \ + src/common/mathmisc.cpp \ src/common/Thread.cpp \ src/finer/R3Stretcher.cpp diff --git a/otherbuilds/Makefile.macos-universal b/otherbuilds/Makefile.macos-universal index 5513ff7..03f4a0a 100644 --- a/otherbuilds/Makefile.macos-universal +++ b/otherbuilds/Makefile.macos-universal @@ -44,6 +44,7 @@ LIBRARY_SOURCES := \ src/common/Resampler.cpp \ src/common/StretchCalculator.cpp \ src/common/sysutils.cpp \ + src/common/mathmisc.cpp \ src/common/Thread.cpp \ src/finer/R3Stretcher.cpp diff --git a/otherbuilds/rubberband-library.vcxproj b/otherbuilds/rubberband-library.vcxproj index 3f84bb1..816c8be 100644 --- a/otherbuilds/rubberband-library.vcxproj +++ b/otherbuilds/rubberband-library.vcxproj @@ -157,6 +157,7 @@ + diff --git a/src/common/mathmisc.h b/src/common/mathmisc.h index 3928b8e..578d818 100644 --- a/src/common/mathmisc.h +++ b/src/common/mathmisc.h @@ -24,6 +24,8 @@ #ifndef RUBBERBAND_MATHMISC_H #define RUBBERBAND_MATHMISC_H +#include "sysutils.h" + #ifndef M_PI #define M_PI 3.14159265358979323846 #endif // M_PI From 021de9d51ca8bdcd598329e8bd02d1c3541f7718 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 2 Sep 2022 14:49:34 +0100 Subject: [PATCH 37/41] Add resample-before mode --- src/common/Resampler.cpp | 4 +- src/finer/R3Stretcher.cpp | 111 ++++++++++++++++++++++++++++---------- src/finer/R3Stretcher.h | 23 ++++++++ 3 files changed, 107 insertions(+), 31 deletions(-) diff --git a/src/common/Resampler.cpp b/src/common/Resampler.cpp index d256820..ab2de9c 100644 --- a/src/common/Resampler.cpp +++ b/src/common/Resampler.cpp @@ -1222,9 +1222,9 @@ D_Speex::setRatio(double ratio) // Speex wants a ratio of two unsigned integers, not a single // float. Let's do that. - int max_denom = 96000; + int max_denom = 48000; if (ratio > 1.0) { - max_denom = int(ceil(96000 / ratio)); + max_denom = int(ceil(48000 / ratio)); } int inum, idenom; diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index fd719c1..6ad9074 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -298,6 +298,20 @@ R3Stretcher::createResampler() m_resampler = std::unique_ptr (new Resampler(resamplerParameters, m_parameters.channels)); + + bool before, after; + areWeResampling(&before, &after); + if (before) { + if (after) { + m_log.log(0, "WARNING: createResampler: we think we are resampling both before and after!"); + } else { + m_log.log(1, "createResampler: resampling before"); + } + } else { + if (after) { + m_log.log(0, "createResampler: resampling after"); + } + } } void @@ -645,24 +659,69 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final) } else { m_mode = ProcessMode::Processing; } - - size_t ws = m_channelData[0]->inbuf->getWriteSpace(); - if (samples > ws) { - m_log.log(2, "R3Stretcher::process: insufficient space in input buffer, attempting consume before write"); - consume(); - ws = m_channelData[0]->inbuf->getWriteSpace(); - } - if (samples > ws) { - m_log.log(0, "R3Stretcher::process: WARNING: Forced to increase input buffer size. Either setMaxProcessSize was not properly called or process is being called repeatedly without retrieve. Write space and samples", ws, samples); - size_t newSize = m_channelData[0]->inbuf->getSize() - ws + samples; - for (int c = 0; c < m_parameters.channels; ++c) { - auto newBuf = m_channelData[c]->inbuf->resized(newSize); - m_channelData[c]->inbuf = std::unique_ptr>(newBuf); - } - } - for (int c = 0; c < m_parameters.channels; ++c) { - m_channelData[c]->inbuf->write(input[c], samples); + int channels = m_parameters.channels; + int toWrite = int(samples); + + bool resamplingBefore = false; + areWeResampling(&resamplingBefore, nullptr); + + if (resamplingBefore) { + + for (int c = 0; c < channels; ++c) { + auto &cd = m_channelData.at(c); + m_channelAssembly.resampled[c] = cd->resampled.data(); + } + + toWrite = m_resampler->resample + (m_channelAssembly.resampled.data(), + m_channelData.at(0)->resampled.size(), + input, + int(samples), + 1.0 / m_pitchScale, + final); + } + + int written = 0; + + while (written < toWrite) { + + int remaining = toWrite - written; + int ws = m_channelData[0]->inbuf->getWriteSpace(); + + if (ws == 0) { + consume(); + ws = m_channelData[0]->inbuf->getWriteSpace(); + } + + if (ws == 0) { + m_log.log(0, "R3Stretcher::process: WARNING: Forced to increase input buffer size. Either setMaxProcessSize was not properly called, process is being called repeatedly without retrieve, or an internal error has led to an incorrect resampler output calculation. Samples to write", toWrite); + size_t newSize = m_channelData[0]->inbuf->getSize() + toWrite; + for (int c = 0; c < m_parameters.channels; ++c) { + auto newBuf = m_channelData[c]->inbuf->resized(newSize); + m_channelData[c]->inbuf = + std::unique_ptr>(newBuf); + } + continue; + } + + int toWriteHere = remaining; + if (toWriteHere > ws) { + toWriteHere = ws; + } + + for (int c = 0; c < m_parameters.channels; ++c) { + if (resamplingBefore) { + m_channelData[c]->inbuf->write + (m_channelData.at(c)->resampled.data() + written, + toWriteHere); + } else { + m_channelData[c]->inbuf->write + (input[c] + written, toWriteHere); + } + } + + written += toWriteHere; } consume(); @@ -708,6 +767,9 @@ R3Stretcher::consume() int channels = m_parameters.channels; int inhop = m_inhop; + bool resamplingAfter = false; + areWeResampling(nullptr, &resamplingAfter); + double effectivePitchRatio = 1.0 / m_pitchScale; if (m_resampler) { effectivePitchRatio = @@ -818,17 +880,8 @@ R3Stretcher::consume() // Resample - bool resampling = false; - if (m_resampler) { - if (m_pitchScale != 1.0 || - (m_parameters.options & - RubberBandStretcher::OptionPitchHighConsistency)) { - resampling = true; - } - } - int resampledCount = 0; - if (resampling) { + if (resamplingAfter) { for (int c = 0; c < channels; ++c) { auto &cd = m_channelData.at(c); m_channelAssembly.mixdown[c] = cd->mixdown.data(); @@ -846,7 +899,7 @@ R3Stretcher::consume() // Emit int writeCount = outhop; - if (resampling) { + if (resamplingAfter) { writeCount = resampledCount; } if (!isRealTime()) { @@ -871,7 +924,7 @@ R3Stretcher::consume() for (int c = 0; c < channels; ++c) { auto &cd = m_channelData.at(c); - if (resampling) { + if (resamplingAfter) { cd->outbuf->write(cd->resampled.data(), writeCount); } else { cd->outbuf->write(cd->mixdown.data(), writeCount); diff --git a/src/finer/R3Stretcher.h b/src/finer/R3Stretcher.h index daf9090..07dde2e 100644 --- a/src/finer/R3Stretcher.h +++ b/src/finer/R3Stretcher.h @@ -398,6 +398,29 @@ protected: RubberBandStretcher::OptionProcessRealTime; } + void areWeResampling(bool *before, bool *after) const { + + if (before) *before = false; + if (after) *after = false; + if (!m_resampler) return; + + if (m_parameters.options & + RubberBandStretcher::OptionPitchHighConsistency) { + if (after) *after = true; + + } else if (m_pitchScale != 1.0) { + if (m_pitchScale > 1.0 && + (m_parameters.options & + RubberBandStretcher::OptionPitchHighQuality)) { + if (after) *after = true; + } else if (m_pitchScale < 1.0) { + if (after) *after = true; + } else { + if (before) *before = true; + } + } + } + bool isSingleWindowed() const { return m_parameters.options & RubberBandStretcher::OptionWindowShort; From 92362bdba0d5cb33ed76f0af4bdcf91917837e54 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 2 Sep 2022 15:08:29 +0100 Subject: [PATCH 38/41] Resample in chunks as well, to avoid overrunning resample buffer --- src/finer/R3Stretcher.cpp | 86 ++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/src/finer/R3Stretcher.cpp b/src/finer/R3Stretcher.cpp index 6ad9074..8cdbde2 100644 --- a/src/finer/R3Stretcher.cpp +++ b/src/finer/R3Stretcher.cpp @@ -309,7 +309,7 @@ R3Stretcher::createResampler() } } else { if (after) { - m_log.log(0, "createResampler: resampling after"); + m_log.log(1, "createResampler: resampling after"); } } } @@ -660,33 +660,15 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final) m_mode = ProcessMode::Processing; } - int channels = m_parameters.channels; - int toWrite = int(samples); - bool resamplingBefore = false; areWeResampling(&resamplingBefore, nullptr); - - if (resamplingBefore) { - for (int c = 0; c < channels; ++c) { - auto &cd = m_channelData.at(c); - m_channelAssembly.resampled[c] = cd->resampled.data(); - } + int channels = m_parameters.channels; + int inputIx = 0; - toWrite = m_resampler->resample - (m_channelAssembly.resampled.data(), - m_channelData.at(0)->resampled.size(), - input, - int(samples), - 1.0 / m_pitchScale, - final); - } - - int written = 0; - - while (written < toWrite) { + while (inputIx < int(samples)) { - int remaining = toWrite - written; + int remaining = int(samples) - inputIx; int ws = m_channelData[0]->inbuf->getWriteSpace(); if (ws == 0) { @@ -695,8 +677,8 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final) } if (ws == 0) { - m_log.log(0, "R3Stretcher::process: WARNING: Forced to increase input buffer size. Either setMaxProcessSize was not properly called, process is being called repeatedly without retrieve, or an internal error has led to an incorrect resampler output calculation. Samples to write", toWrite); - size_t newSize = m_channelData[0]->inbuf->getSize() + toWrite; + m_log.log(0, "R3Stretcher::process: WARNING: Forced to increase input buffer size. Either setMaxProcessSize was not properly called, process is being called repeatedly without retrieve, or an internal error has led to an incorrect resampler output calculation. Samples to write", remaining); + size_t newSize = m_channelData[0]->inbuf->getSize() + remaining; for (int c = 0; c < m_parameters.channels; ++c) { auto newBuf = m_channelData[c]->inbuf->resized(newSize); m_channelData[c]->inbuf = @@ -704,27 +686,47 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final) } continue; } + + if (resamplingBefore) { - int toWriteHere = remaining; - if (toWriteHere > ws) { - toWriteHere = ws; - } - - for (int c = 0; c < m_parameters.channels; ++c) { - if (resamplingBefore) { - m_channelData[c]->inbuf->write - (m_channelData.at(c)->resampled.data() + written, - toWriteHere); - } else { - m_channelData[c]->inbuf->write - (input[c] + written, toWriteHere); + for (int c = 0; c < channels; ++c) { + auto &cd = m_channelData.at(c); + m_channelAssembly.resampled[c] = cd->resampled.data(); } + + int resampleBufSize = int(m_channelData.at(0)->resampled.size()); + int maxResampleOutput = std::min(ws, resampleBufSize); + + int maxResampleInput = int(floor(maxResampleOutput * m_pitchScale)); + int resampleInput = std::min(remaining, maxResampleInput); + if (resampleInput == 0) resampleInput = 1; + + int resampleOutput = m_resampler->resample + (m_channelAssembly.resampled.data(), + maxResampleOutput, + input, + resampleInput, + 1.0 / m_pitchScale, + final); + + inputIx += resampleInput; + + for (int c = 0; c < m_parameters.channels; ++c) { + m_channelData[c]->inbuf->write + (m_channelData.at(c)->resampled.data(), + resampleOutput); + } + + } else { + int toWrite = std::min(ws, remaining); + for (int c = 0; c < m_parameters.channels; ++c) { + m_channelData[c]->inbuf->write (input[c] + inputIx, toWrite); + } + inputIx += toWrite; } - - written += toWriteHere; + + consume(); } - - consume(); } int From 45e8bef4cf5e6ef7634b9e61d76a6f175052596f Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 2 Sep 2022 15:58:08 +0100 Subject: [PATCH 39/41] Slightly nicer output --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index 213084c..cc74f23 100644 --- a/meson.build +++ b/meson.build @@ -286,7 +286,7 @@ elif resampler == 'libspeexdsp' header_args: extra_include_args, required: true) endif - config_summary += { 'Resampler': 'libspeexdsp' } + config_summary += { 'Resampler': 'Speex DSP' } message('For resampler: using Speex DSP library') message('(consider libsamplerate if time-varying pitch shift is required)') feature_dependencies += speexdsp_dep From 8ee381efe6d9a6cd4e4aec4c151d8d44bdce4f48 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 2 Sep 2022 16:20:17 +0100 Subject: [PATCH 40/41] Update docs for library support --- COMPILING.md | 91 ++++++++++++++++++++++++++-------------------------- 1 file changed, 46 insertions(+), 45 deletions(-) diff --git a/COMPILING.md b/COMPILING.md index 1a923de..0cfa42e 100644 --- a/COMPILING.md +++ b/COMPILING.md @@ -239,66 +239,67 @@ resampler or libsamplerate. ### FFT libraries supported +The choice of FFT library makes no difference to output quality, only +to CPU usage. + ``` -Library Build option CPP define Notes ----- ------------ ---------- ----- +Library Build option CPP define Notes +---- ------------ ---------- ----- -Built-in -Dfft=builtin -DUSE_BUILTIN_FFT - Default except on macOS/iOS. - Can be distributed with either - the Rubber Band GPL or - commercial licence. +Built-in -Dfft=builtin -DUSE_BUILTIN_FFT Default except on macOS/iOS. -Accelerate -Dfft=vdsp -DHAVE_VDSP Default on macOS/iOS. - Best option on these platforms. +Accelerate -Dfft=vdsp -DHAVE_VDSP Default on macOS/iOS. + Best option on these platforms. -FFTW3 -Dfft=fftw -DHAVE_FFTW3 GPL. - A bit faster than built-in, - a bit slower than Accelerate. +FFTW3 -Dfft=fftw -DHAVE_FFTW3 A bit faster than built-in, + a bit slower than Accelerate. + GPL licence. -SLEEF -Dfft=sleef -DHAVE_SLEEF Usually very fast. Not as widely - distributed as FFTW3. Requires - both libsleef and libsleefdft. - Can be distributed with either - the Rubber Band GPL or - commercial licence. +SLEEF -Dfft=sleef -DHAVE_SLEEF Usually very fast. Not as widely + distributed as FFTW3. Requires + both libsleef and libsleefdft. + BSD-ish licence. -KissFFT -Dfft=kissfft -DHAVE_KISSFFT - Single precision. - Only indicated for use with - single-precision sample type - (see below). - Bundled, can be distributed with - either the Rubber Band GPL or - commercial licence. +KissFFT -Dfft=kissfft -DHAVE_KISSFFT Single precision. + Only advisable when using + single-precision sample type + (see below). + BSD-ish licence. -Intel IPP -Dfft=ipp -DHAVE_IPP Proprietary, can only be used with - Rubber Band commercial licence. +Intel IPP -Dfft=ipp -DHAVE_IPP Very fast on Intel hardware. + Proprietary, can only be used with + Rubber Band commercial licence. ``` ### Resampler libraries supported +The choice of resampler affects both output quality, when +pitch-shifting, and CPU usage. + ``` -Library Build option CPP define Notes ----- ------------ ---------- ----- +Library Build option CPP define Notes +------- ------------ ---------- ----- -Built-in -Dfft=builtin -DUSE_BQRESAMPLER - Default. - Can be distributed with either - the Rubber Band GPL or - commercial licence. Intended to - give best quality for time-varying - pitch shifts in real-time mode. - Newer than, and not as well-tested - as, libsamplerate. +Built-in -Dfft=builtin -DUSE_BQRESAMPLER Default. + Intended to give high quality + for time-varying pitch shifts + in real-time mode. + Not the fastest option. -libsamplerate -DHAVE_LIBSAMPLERATE - -Dresampler=libsamplerate Good choice in most cases. +libsamplerate -Dresampler=libsamplerate -DHAVE_LIBSAMPLERATE Good choice in most cases. + High quality and usually a bit + faster than the built-in option. + BSD-ish licence. -Speex -DUSE_SPEEX - -Dresampler=speex Can be distributed with - either the Rubber Band GPL or - commercial licence. +libspeexdsp -Dresampler=libspeexdsp -DHAVE_LIBSPEEXDSP Very fast. + May not be artifact-free for + time-varying pitch shifts. + BSD-ish licence. + +Bundled Speex -Dresampler=speex -DUSE_SPEEX Older Speex code, bundled for + compatibility with some existing + projects. + Avoid for new projects. ``` ## 8. Other supported #defines From f4115e63facfaaec0e43b1a818df26d3cada1086 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 2 Sep 2022 16:35:35 +0100 Subject: [PATCH 41/41] Update docs --- rubberband/RubberBandStretcher.h | 40 ++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/rubberband/RubberBandStretcher.h b/rubberband/RubberBandStretcher.h index 85bb6a6..0025315 100644 --- a/rubberband/RubberBandStretcher.h +++ b/rubberband/RubberBandStretcher.h @@ -262,26 +262,32 @@ public: * situation where \c OptionThreadingAuto would do so, except omit * the check for multiple CPUs and instead assume it to be true. * - * 7. Flags prefixed \c OptionWindow control the window size for - * FFT processing in the R2 engine. (The window size actually - * used will depend on many factors, but it can be influenced.) - * These options currently have no effect when using the R3 - * engine, but they may do in the future - so code written to use - * R3 now is recommended to use the default. These options may + * 7. Flags prefixed \c OptionWindow influence the window size for + * FFT processing. In the R2 engine these affect the resulting + * sound quality but have relatively little effect on processing + * speed. With the R3 engine they can dramatically affect + * processing speed as well as output quality. These options may * not be changed after construction. * * \li \c OptionWindowStandard - Use the default window size. * The actual size will vary depending on other parameters. * This option is expected to produce better results than the - * other window options in most situations. + * other window options in most situations. In the R3 engine + * this causes the engine's full multi-resolution processing + * scheme to be used. * - * \li \c OptionWindowShort - Use a shorter window. This may - * result in crisper sound for audio that depends strongly on - * its timing qualities. + * \li \c OptionWindowShort - Use a shorter window. With the R2 + * engine this may result in crisper sound for audio that + * depends strongly on its timing qualities. With the R3 engine, + * this causes the engine to be restricted to a single window + * size, resulting in both dramatically faster processing and + * lower latency than OptionWindowStandard, but at the expense + * of some sound quality. * - * \li \c OptionWindowLong - Use a longer window. This is - * likely to result in a smoother sound at the expense of - * clarity and timing. + * \li \c OptionWindowLong - Use a longer window. With the R2 + * engine this is likely to result in a smoother sound at the + * expense of clarity and timing. The R3 engine currently + * ignores this option, treating it like OptionWindowStandard. * * 8. Flags prefixed \c OptionSmoothing control the use of * window-presum FFT and time-domain smoothing in the R2 @@ -331,10 +337,10 @@ public: * \li \c OptionPitchHighConsistency - Use a method that * supports dynamic pitch changes without discontinuities, * including when crossing the 1.0 pitch scale. This may cost - * more in CPU than the other two options, especially when the - * pitch scale is exactly 1.0. You should use this option - * whenever you wish to support dynamically changing pitch - * shifts during processing. + * more in CPU than the default, especially when the pitch scale + * is exactly 1.0. You should use this option whenever you wish + * to support dynamically changing pitch shift during + * processing. * * 11. Flags prefixed \c OptionChannels control the method used * for processing two-channel stereo audio. These have different,