From 8fee46b704b1f5f4256a71b11130b9b59bdc6889 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 9 Aug 2022 15:50:02 +0100 Subject: [PATCH] Add SLEEF FFT support --- meson.build | 35 +++++ meson_options.txt | 2 +- src/common/Allocators.h | 6 +- src/common/FFT.cpp | 322 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 360 insertions(+), 5 deletions(-) diff --git a/meson.build b/meson.build index 54951a2..e527bf3 100644 --- a/meson.build +++ b/meson.build @@ -112,6 +112,8 @@ foreach d: get_option('extra_include_dirs') endforeach fftw3_dep = dependency('fftw3', version: '>= 3.0.0', required: false) +sleef_dep = dependency('sleef', version: '>= 3.3.0', required: false) +sleefdft_dep = dependency('sleefdft', version: '>= 3.3.0', required: false) samplerate_dep = dependency('samplerate', version: '>= 0.1.8', required: false) sndfile_dep = dependency('sndfile', version: '>= 1.0.16', required: false) vamp_dep = dependency('vamp-sdk', version: '>= 2.9', required: false) @@ -164,6 +166,9 @@ if fft == 'builtin' if fftw3_dep.found() message('(to use FFTW instead, reconfigure with -Dfft=fftw)') endif + if sleef_dep.found() + message('(to use SLEEF instead, reconfigure with -Dfft=sleef)') + endif feature_defines += ['-DUSE_BUILTIN_FFT'] elif fft == 'kissfft' @@ -172,6 +177,9 @@ elif fft == 'kissfft' if fftw3_dep.found() message('(to use FFTW instead, reconfigure with -Dfft=fftw)') endif + if sleef_dep.found() + message('(to use SLEEF instead, reconfigure with -Dfft=sleef)') + endif feature_sources += ['src/ext/kissfft/kiss_fft.c', 'src/ext/kissfft/kiss_fftr.c'] feature_defines += ['-DHAVE_KISSFFT'] general_include_dirs += 'src/ext/kissfft' @@ -180,6 +188,9 @@ elif fft == 'fftw' if fftw3_dep.found() config_summary += { 'FFT': 'FFTW' } message('For FFT: using FFTW') + if sleef_dep.found() + message('(to use SLEEF instead, reconfigure with -Dfft=sleef)') + endif pkgconfig_requirements += fftw3_dep else fftw_dep = cpp.find_library('fftw3', @@ -187,10 +198,34 @@ elif fft == 'fftw' has_headers: ['fftw3.h'], header_args: extra_include_args, required: true) + config_summary += { 'FFT': 'FFTW' } endif feature_dependencies += fftw3_dep feature_defines += ['-DHAVE_FFTW3', '-DFFTW_DOUBLE_ONLY'] +elif fft == 'sleef' + if sleefdft_dep.found() and sleef_dep.found() + config_summary += { 'FFT': 'SLEEF' } + message('For FFT: using SLEEF') + pkgconfig_requirements += sleefdft_dep + pkgconfig_requirements += sleef_dep + else + sleefdft_dep = cpp.find_library('sleefdft', + dirs: get_option('extra_lib_dirs'), + has_headers: ['sleefdft.h'], + header_args: extra_include_args, + required: true) + sleef_dep = cpp.find_library('sleef', + dirs: get_option('extra_lib_dirs'), + has_headers: ['sleef.h'], + header_args: extra_include_args, + required: true) + config_summary += { 'FFT': 'SLEEF' } + endif + feature_dependencies += sleefdft_dep + feature_dependencies += sleef_dep + feature_defines += ['-DHAVE_SLEEF'] + elif fft == 'vdsp' config_summary += { 'FFT': 'vDSP' } message('For FFT: using vDSP') diff --git a/meson_options.txt b/meson_options.txt index c820c20..62e8337 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1,7 +1,7 @@ option('fft', type: 'combo', - choices: ['auto', 'builtin', 'kissfft', 'fftw', 'vdsp', 'ipp'], + choices: ['auto', 'builtin', 'kissfft', 'fftw', 'sleef', 'vdsp', 'ipp'], value: 'auto', description: 'FFT library to use. The default (auto) will use vDSP if available, the builtin implementation otherwise.') diff --git a/src/common/Allocators.h b/src/common/Allocators.h index 1ac43f6..3c5bba2 100644 --- a/src/common/Allocators.h +++ b/src/common/Allocators.h @@ -85,9 +85,9 @@ T *allocate(size_t count) #else /* !MALLOC_IS_ALIGNED */ // That's the "sufficiently aligned" functions dealt with, the - // rest need a specific alignment provided to the call. 32-byte - // alignment is required for at least OpenMAX - static const int alignment = 32; + // rest need a specific alignment provided to the call. 64-byte + // alignment is enough for 8x8 double operations + static const int alignment = 64; #ifdef HAVE__ALIGNED_MALLOC ptr = _aligned_malloc(count * sizeof(T), alignment); diff --git a/src/common/FFT.cpp b/src/common/FFT.cpp index 984901f..66519d2 100644 --- a/src/common/FFT.cpp +++ b/src/common/FFT.cpp @@ -53,6 +53,13 @@ #include #endif +#ifdef HAVE_SLEEF +extern "C" { +#include +#include +} +#endif + #ifdef HAVE_VDSP #include #endif @@ -63,6 +70,7 @@ #ifndef HAVE_IPP #ifndef HAVE_FFTW3 +#ifndef HAVE_SLEEF #ifndef HAVE_KISSFFT #ifndef USE_BUILTIN_FFT #ifndef HAVE_VDSP @@ -72,6 +80,7 @@ #endif #endif #endif +#endif #include #include @@ -1425,6 +1434,302 @@ pthread_mutex_t D_FFTW::m_commonMutex = PTHREAD_MUTEX_INITIALIZER; #endif /* HAVE_FFTW3 */ +#ifdef HAVE_SLEEF + +class D_SLEEF : public FFTImpl +{ + bool isAligned(const void *ptr) { + return ! ((uintptr_t)ptr & 63); + } + +public: + D_SLEEF(int size) : + m_fplanf(0), m_fplani(0), m_fbuf(0), m_fpacked(0), + m_dplanf(0), m_dplani(0), m_dbuf(0), m_dpacked(0), + m_size(size) + { + } + + ~D_SLEEF() { + if (m_fplanf) { + SleefDFT_dispose(m_fplanf); + SleefDFT_dispose(m_fplani); + Sleef_free(m_fbuf); + Sleef_free(m_fpacked); + } + if (m_dplanf) { + SleefDFT_dispose(m_dplanf); + SleefDFT_dispose(m_dplani); + Sleef_free(m_dbuf); + Sleef_free(m_dpacked); + } + } + + int getSize() const { + return m_size; + } + + FFT::Precisions + getSupportedPrecisions() const { + return FFT::SinglePrecision | FFT::DoublePrecision; + } + + void initFloat() { + if (m_fplanf) return; + + m_fbuf = static_cast + (Sleef_malloc(m_size * sizeof(float))); + m_fpacked = static_cast + (Sleef_malloc((m_size + 2) * sizeof(float))); + + m_fplanf = SleefDFT_float_init1d + (m_size, m_fbuf, m_fpacked, + SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); + + m_fplani = SleefDFT_float_init1d + (m_size, m_fpacked, m_fbuf, + SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); + } + + void initDouble() { + if (m_dplanf) return; + + m_dbuf = static_cast + (Sleef_malloc(m_size * sizeof(double))); + m_dpacked = static_cast + (Sleef_malloc((m_size + 2) * sizeof(double))); + + m_dplanf = SleefDFT_double_init1d + (m_size, m_dbuf, m_dpacked, + SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); + + m_dplani = SleefDFT_double_init1d + (m_size, m_dpacked, m_dbuf, + SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE); + } + + void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) { + const float *src[2] = { re, im }; + v_interleave(m_fpacked, src, 2, m_size/2 + 1); + } + + void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) { + const double *src[2] = { re, im }; + v_interleave(m_dpacked, src, 2, m_size/2 + 1); + } + + void unpackFloat(float *BQ_R__ re, float *BQ_R__ im) { + float *dst[2] = { re, im }; + v_deinterleave(dst, m_fpacked, 2, m_size/2 + 1); + } + + void unpackDouble(double *BQ_R__ re, double *BQ_R__ im) { + double *dst[2] = { re, im }; + v_deinterleave(dst, m_dpacked, 2, m_size/2 + 1); + } + + void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) { + if (!m_dplanf) initDouble(); + if (isAligned(realIn)) { + SleefDFT_double_execute(m_dplanf, realIn, 0); + } else { + v_copy(m_dbuf, realIn, m_size); + SleefDFT_double_execute(m_dplanf, 0, 0); + } + unpackDouble(realOut, imagOut); + } + + void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) { + if (!m_dplanf) initDouble(); + if (isAligned(realIn) && isAligned(complexOut)) { + SleefDFT_double_execute(m_dplanf, realIn, complexOut); + } else { + v_copy(m_dbuf, realIn, m_size); + SleefDFT_double_execute(m_dplanf, 0, 0); + v_copy(complexOut, m_dpacked, m_size + 2); + } + } + + void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) { + if (!m_dplanf) initDouble(); + if (isAligned(realIn)) { + SleefDFT_double_execute(m_dplanf, realIn, 0); + } else { + v_copy(m_dbuf, realIn, m_size); + SleefDFT_double_execute(m_dplanf, 0, 0); + } + v_cartesian_interleaved_to_polar(magOut, phaseOut, m_dpacked, m_size/2+1); + } + + void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) { + if (!m_dplanf) initDouble(); + if (isAligned(realIn)) { + SleefDFT_double_execute(m_dplanf, realIn, 0); + } else { + v_copy(m_dbuf, realIn, m_size); + SleefDFT_double_execute(m_dplanf, 0, 0); + } + v_cartesian_interleaved_to_magnitudes(magOut, m_dpacked, m_size/2+1); + } + + void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) { + if (!m_fplanf) initFloat(); + if (isAligned(realIn)) { + SleefDFT_float_execute(m_fplanf, realIn, 0); + } else { + v_copy(m_fbuf, realIn, m_size); + SleefDFT_float_execute(m_fplanf, 0, 0); + } + unpackFloat(realOut, imagOut); + } + + void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) { + if (!m_fplanf) initFloat(); + if (isAligned(realIn) && isAligned(complexOut)) { + SleefDFT_float_execute(m_fplanf, realIn, complexOut); + } else { + v_copy(m_fbuf, realIn, m_size); + SleefDFT_float_execute(m_fplanf, 0, 0); + v_copy(complexOut, m_fpacked, m_size + 2); + } + } + + void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) { + if (!m_fplanf) initFloat(); + if (isAligned(realIn)) { + SleefDFT_float_execute(m_fplanf, realIn, 0); + } else { + v_copy(m_fbuf, realIn, m_size); + SleefDFT_float_execute(m_fplanf, 0, 0); + } + v_cartesian_interleaved_to_polar(magOut, phaseOut, m_fpacked, m_size/2+1); + } + + void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) { + if (!m_fplanf) initFloat(); + if (isAligned(realIn)) { + SleefDFT_float_execute(m_fplanf, realIn, 0); + } else { + v_copy(m_fbuf, realIn, m_size); + SleefDFT_float_execute(m_fplanf, 0, 0); + } + v_cartesian_interleaved_to_magnitudes(magOut, m_fpacked, m_size/2+1); + } + + void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) { + if (!m_dplanf) initDouble(); + packDouble(realIn, imagIn); + if (isAligned(realOut)) { + SleefDFT_double_execute(m_dplani, 0, realOut); + } else { + SleefDFT_double_execute(m_dplani, 0, 0); + v_copy(realOut, m_dbuf, m_size); + } + } + + void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) { + if (!m_dplanf) initDouble(); + if (isAligned(complexIn) && isAligned(realOut)) { + SleefDFT_double_execute(m_dplani, complexIn, realOut); + } else { + v_copy(m_dpacked, complexIn, m_size + 2); + SleefDFT_double_execute(m_dplani, 0, 0); + v_copy(realOut, m_dbuf, m_size); + } + } + + void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) { + if (!m_dplanf) initDouble(); + v_polar_to_cartesian_interleaved(m_dpacked, magIn, phaseIn, m_size/2+1); + if (isAligned(realOut)) { + SleefDFT_double_execute(m_dplani, 0, realOut); + } else { + SleefDFT_double_execute(m_dplani, 0, 0); + v_copy(realOut, m_dbuf, m_size); + } + } + + void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) { + if (!m_dplanf) initDouble(); + const int hs = m_size/2; + for (int i = 0; i <= hs; ++i) { + m_dpacked[i*2] = log(magIn[i] + 0.000001); + m_dpacked[i*2+1] = 0.0; + } + if (isAligned(cepOut)) { + SleefDFT_double_execute(m_dplani, 0, cepOut); + } else { + SleefDFT_double_execute(m_dplani, 0, 0); + v_copy(cepOut, m_dbuf, m_size); + } + } + + void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) { + if (!m_fplanf) initFloat(); + packFloat(realIn, imagIn); + if (isAligned(realOut)) { + SleefDFT_float_execute(m_dplani, 0, realOut); + } else { + SleefDFT_float_execute(m_fplani, 0, 0); + v_copy(realOut, m_fbuf, m_size); + } + } + + void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) { + if (!m_fplanf) initFloat(); + if (isAligned(complexIn) && isAligned(realOut)) { + SleefDFT_float_execute(m_fplani, complexIn, realOut); + } else { + v_copy(m_fpacked, complexIn, m_size + 2); + SleefDFT_float_execute(m_fplani, 0, 0); + v_copy(realOut, m_fbuf, m_size); + } + } + + void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) { + if (!m_fplanf) initFloat(); + v_polar_to_cartesian_interleaved(m_fpacked, magIn, phaseIn, m_size/2+1); + if (isAligned(realOut)) { + SleefDFT_float_execute(m_fplani, 0, realOut); + } else { + SleefDFT_float_execute(m_fplani, 0, 0); + v_copy(realOut, m_fbuf, m_size); + } + } + + void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) { + if (!m_fplanf) initFloat(); + const int hs = m_size/2; + for (int i = 0; i <= hs; ++i) { + m_fpacked[i*2] = logf(magIn[i] + 0.000001f); + m_fpacked[i*2+1] = 0.0; + } + if (isAligned(cepOut)) { + SleefDFT_float_execute(m_fplani, 0, cepOut); + } else { + SleefDFT_float_execute(m_fplani, 0, 0); + v_copy(cepOut, m_fbuf, m_size); + } + } + +private: + SleefDFT *m_fplanf; + SleefDFT *m_fplani; + + float *m_fbuf; + float *m_fpacked; + + SleefDFT *m_dplanf; + SleefDFT *m_dplani; + + double *m_dbuf; + double *m_dpacked; + + const int m_size; +}; + +#endif /* HAVE_SLEEF */ + #ifdef HAVE_KISSFFT class D_KISSFFT : public FFTImpl @@ -2266,6 +2571,9 @@ getImplementationDetails() #ifdef HAVE_FFTW3 impls["fftw"] = SizeConstraintNone; #endif +#ifdef HAVE_SLEEF + impls["sleef"] = SizeConstraintEvenPowerOfTwo; +#endif #ifdef HAVE_KISSFFT impls["kissfft"] = SizeConstraintEven; #endif @@ -2310,7 +2618,7 @@ pickImplementation(int size) } std::string preference[] = { - "ipp", "vdsp", "fftw", "builtin", "kissfft" + "ipp", "vdsp", "sleef", "fftw", "builtin", "kissfft" }; for (int i = 0; i < int(sizeof(preference)/sizeof(preference[0])); ++i) { @@ -2390,6 +2698,10 @@ FFT::FFT(int size, int debugLevel) : } else if (impl == "fftw") { #ifdef HAVE_FFTW3 d = new FFTs::D_FFTW(size); +#endif + } else if (impl == "sleef") { +#ifdef HAVE_SLEEF + d = new FFTs::D_SLEEF(size); #endif } else if (impl == "kissfft") { #ifdef HAVE_KISSFFT @@ -2650,6 +2962,14 @@ FFT::tune() candidates["fftw"] = d; #endif +#ifdef HAVE_SLEEF + os << "Constructing new SLEEF FFT object for size " << size << "..." << std::endl; + d = new FFTs::D_SLEEF(size); + d->initFloat(); + d->initDouble(); + candidates["sleef"] = d; +#endif + #ifdef HAVE_KISSFFT os << "Constructing new KISSFFT object for size " << size << "..." << std::endl; d = new FFTs::D_KISSFFT(size);