diff --git a/.appveyor.yml b/.appveyor.yml
index fe8e8e5..d8558dd 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -10,7 +10,6 @@ platform:
 
 install:
   - cinst wget
-  - cinst meson
   - cinst libsndfile
 
 build_script:
diff --git a/README.md b/README.md
index d18a9d4..a993016 100644
--- a/README.md
+++ b/README.md
@@ -87,8 +87,8 @@ Rubber Band consists of:
  * The Rubber Band Library code.  This is the code that will normally
    be used by your applications.  The headers for this are in the
    rubberband/ directory, and the source code is in src/.
-   The Rubber Band Library depends upon resampler and FFT code; see
-   section 3a below for details.
+   The Rubber Band Library may also depend upon external resampler
+   and FFT code; see section 3a below for details.
 
  * The Rubber Band command-line tool.  This is in main/main.cpp.
    This program uses the Rubber Band Library and also requires libsndfile
@@ -194,9 +194,9 @@ standard. It is unlikely to make any difference (performance or
 otherwise) which C++ standard your compiler uses - as long as it's no
 older than C++98!
 
-If you are building this software using one of the bundled library
-options (Speex or KissFFT), please be sure to review the terms for
-those libraries in `src/speex/COPYING` and `src/kissfft/COPYING` as
+If you are building this software using either of the Speex or KissFFT
+library options, please be sure to review the terms for those
+libraries in `src/speex/COPYING` and `src/kissfft/COPYING` as
 applicable.
 
 
@@ -369,13 +369,26 @@ options (Speex or KissFFT), please be sure to review the terms for
 those libraries in `src/speex/COPYING` and `src/kissfft/COPYING` as
 applicable.
 
+If you are proposing to package Rubber Band for a Linux distribution
+using other packaged libraries, please select FFTW and libsamplerate.
+
 #### FFT libraries supported
 
 ```
 Library     Build option    CPP define     Notes
 ----        ------------    ----------     -----
 
-KissFFT     -Dfft=kissfft   -DUSE_KISSFFT  Default except on macOS/iOS.
+Built-in    -Dfft=builtin   -DUSE_BUILTIN_FFT
+                                           Default except on macOS/iOS.
+                                           Can be distributed with either
+                                           the Rubber Band GPL or
+                                           commercial licence.
+
+KissFFT     -Dfft=kissfft   -DHAVE_KISSFFT
+                                           Single precision.
+                                           Only indicated for use with
+                                           single-precision sample type
+                                           (see below).
                                            Bundled, can be distributed with
                                            either the Rubber Band GPL or
                                            commercial licence.
@@ -432,8 +445,9 @@ build files will handle these for you.)
 
     -DPROCESS_SAMPLE_TYPE=float
     Select single precision for internal calculations. The default is
-    double precision. Consider using for mobile architectures with
-    slower double-precision support.
+    double precision. Consider in conjunction with single-precision
+    KissFFT for mobile architectures with slower double-precision
+    support.
 
     -DUSE_POMMIER_MATHFUN
     Select the Julien Pommier implementations of trig functions for ARM
diff --git a/dotnet/rubberband-library.vcxproj b/dotnet/rubberband-library.vcxproj
index 93125a2..8fbddaf 100644
--- a/dotnet/rubberband-library.vcxproj
+++ b/dotnet/rubberband-library.vcxproj
@@ -77,7 +77,7 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>..;..\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>__MSVC__;WIN32;_DEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_KISSFFT;USE_SPEEX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>__MSVC__;WIN32;_DEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_BUILTIN_FFT;USE_SPEEX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@@ -91,7 +91,7 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>..;..\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>__MSVC__;WIN32;_DEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_KISSFFT;USE_SPEEX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>__MSVC__;WIN32;_DEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_BUILTIN_FFT;USE_SPEEX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <PrecompiledHeader>
@@ -109,7 +109,7 @@
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <OmitFramePointers>true</OmitFramePointers>
       <AdditionalIncludeDirectories>..;..\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>__MSVC__;WIN32;NDEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_KISSFFT;NO_TIMING;USE_SPEEX;NO_THREAD_CHECKS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>__MSVC__;WIN32;NDEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_BUILTIN_FFT;NO_TIMING;USE_SPEEX;NO_THREAD_CHECKS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <BufferSecurityCheck>false</BufferSecurityCheck>
       <EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
@@ -127,7 +127,7 @@
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <OmitFramePointers>true</OmitFramePointers>
       <AdditionalIncludeDirectories>..;..\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>__MSVC__;WIN32;NDEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_KISSFFT;NO_TIMING;USE_SPEEX;NO_THREAD_CHECKS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>__MSVC__;WIN32;NDEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_BUILTIN_FFT;NO_TIMING;USE_SPEEX;NO_THREAD_CHECKS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <BufferSecurityCheck>false</BufferSecurityCheck>
       <EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
@@ -178,8 +178,6 @@
     <ClCompile Include="..\src\dsp\AudioCurveCalculator.cpp" />
     <ClCompile Include="..\src\dsp\FFT.cpp" />
     <ClCompile Include="..\src\dsp\Resampler.cpp" />
-    <ClCompile Include="..\src\kissfft\kiss_fft.c" />
-    <ClCompile Include="..\src\kissfft\kiss_fftr.c" />
     <ClCompile Include="..\src\rubberband-c.cpp" />
     <ClCompile Include="..\src\RubberBandStretcher.cpp" />
     <ClCompile Include="..\src\speex\resample.c" />
diff --git a/meson.build b/meson.build
index d32902c..65d49eb 100644
--- a/meson.build
+++ b/meson.build
@@ -2,7 +2,7 @@
 project(
   'Rubber Band Library',
   'c', 'cpp',
-  version: '1.9.1',
+  version: '1.9.2-pre',
   license: 'GPL-2.0-or-later',
   default_options: [
     # All Rubber Band code is actually C++98, but some compilers no
@@ -101,6 +101,9 @@ sndfile_dep = dependency('sndfile', version: '>= 1.0.16', required: false)
 vamp_dep = dependency('vamp-sdk', version: '>= 2.9', required: false)
 thread_dep = dependency('threads')
 have_ladspa = cpp.has_header('ladspa.h', args: extra_include_args)
+have_sincos = cpp.has_function('sincos',
+                               prefix: '#define _GNU_SOURCE\n#include <math.h>',
+                               args: '-lm')
 have_jni = cpp.has_header('jni.h', args: extra_include_args)
 
 javac = find_program('javac', required: false)
@@ -114,6 +117,7 @@ feature_defines = []
 feature_libraries = []
 feature_sources = []
 pkgconfig_requirements = []
+pkgconfig_libraries = []
 arch_flags = []
 
 config_summary = {}
@@ -128,7 +132,7 @@ if fft == 'auto'
   if system == 'darwin'
     fft = 'vdsp'
   else
-    fft = 'kissfft'
+    fft = 'builtin'
   endif
 endif
 
@@ -140,14 +144,23 @@ if resampler == 'auto'
   endif
 endif
 
-if fft == 'kissfft'
+if fft == 'builtin'
+  config_summary += { 'FFT': 'Built-in' }
+  message('For FFT: using built-in implementation')
+  if fftw3_dep.found()
+    message('(to use FFTW instead, reconfigure with -Dfft=fftw)')
+  endif
+  feature_defines += ['-DUSE_BUILTIN_FFT']
+
+elif fft == 'kissfft'
   config_summary += { 'FFT': 'KissFFT' }
   message('For FFT: using KissFFT')
   if fftw3_dep.found()
     message('(to use FFTW instead, reconfigure with -Dfft=fftw)')
   endif
   feature_sources += ['src/kissfft/kiss_fft.c', 'src/kissfft/kiss_fftr.c']
-  feature_defines += ['-DUSE_KISSFFT']
+  feature_defines += ['-DHAVE_KISSFFT']
+  general_include_dirs += 'src/kissfft'
 
 elif fft == 'fftw'
   if fftw3_dep.found()
@@ -169,6 +182,7 @@ elif fft == 'vdsp'
   message('For FFT: using vDSP')
   feature_defines += ['-DHAVE_VDSP']
   feature_libraries += ['-framework', 'Accelerate']
+  pkgconfig_libraries += ['-framework', 'Accelerate']
   
 elif fft == 'ipp'
   if ipp_path != ''
@@ -185,7 +199,13 @@ else
 
 endif # fft
 
-if resampler == 'libsamplerate'
+if resampler == 'builtin'
+  config_summary += { 'Resampler': 'Built-in' }
+  message('For resampler: using built-in implementation')
+  library_sources += 'src/dsp/BQResampler.cpp'
+  feature_defines += ['-DUSE_BQRESAMPLER']
+
+elif resampler == 'libsamplerate'
   if samplerate_dep.found()
     config_summary += { 'Resampler': 'libsamplerate' }
     message('For resampler: using libsamplerate')
@@ -223,6 +243,10 @@ else
 
 endif # resampler
 
+if not have_sincos
+  feature_defines += [ '-DLACK_SINCOS' ]
+endif
+
 if ipp_needed
   feature_defines += [
     '-DHAVE_IPP',
@@ -572,7 +596,7 @@ pkg.generate(
   url: 'https://breakfastquay.com/rubberband/',
   version: meson.project_version(),
   requires: pkgconfig_requirements,
-  libraries: '-L${libdir} -lrubberband',
+  libraries: ['-L${libdir} -lrubberband'] + pkgconfig_libraries,
   extra_cflags: '-I${includedir}',
 )
 
diff --git a/meson_options.txt b/meson_options.txt
index 1703806..8e65fa0 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -1,13 +1,13 @@
 
 option('fft',
        type: 'combo',
-       choices: ['auto', 'kissfft', 'fftw', 'vdsp', 'ipp'],
+       choices: ['auto', 'builtin', 'kissfft', 'fftw', 'vdsp', 'ipp'],
        value: 'auto',
-       description: 'FFT library to use. The default (auto) will use vDSP if available, KissFFT otherwise.')
+       description: 'FFT library to use. The default (auto) will use vDSP if available, the builtin implementation otherwise.')
 
 option('resampler',
        type: 'combo',
-       choices: ['auto', 'libsamplerate', 'speex', 'ipp'],
+       choices: ['auto', 'builtin', 'libsamplerate', 'speex', 'ipp'],
        value: 'auto',
        description: 'Resampler library to use. Recommended is libsamplerate. The default (auto) will use libsamplerate if available, speex otherwise.')
 
diff --git a/otherbuilds/Makefile.linux b/otherbuilds/Makefile.linux
index 324b038..b6d4118 100644
--- a/otherbuilds/Makefile.linux
+++ b/otherbuilds/Makefile.linux
@@ -6,7 +6,7 @@ OPTFLAGS	:= -DNDEBUG -ffast-math -O3 -ftree-vectorize
 
 ARCHFLAGS	:= 
 
-CXXFLAGS	:= -std=c++98 $(ARCHFLAGS) $(OPTFLAGS) -I. -Isrc -Irubberband -DHAVE_LIBSAMPLERATE -DUSE_KISSFFT -DNO_THREAD_CHECKS -DUSE_PTHREADS -DNO_TIMING -DHAVE_POSIX_MEMALIGN -DNDEBUG
+CXXFLAGS	:= -std=c++98 $(ARCHFLAGS) $(OPTFLAGS) -I. -Isrc -Irubberband -DHAVE_LIBSAMPLERATE -DUSE_BUILTIN_FFT -DNO_THREAD_CHECKS -DUSE_PTHREADS -DNO_TIMING -DHAVE_POSIX_MEMALIGN -DNDEBUG
 
 CFLAGS		:= $(ARCHFLAGS) $(OPTFLAGS)
 
@@ -69,10 +69,8 @@ LIBRARY_SOURCES := \
 	src/system/sysutils.cpp \
 	src/system/Thread.cpp \
 	src/StretcherChannelData.cpp \
-	src/StretcherImpl.cpp \
-	src/kissfft/kiss_fft.c \
-	src/kissfft/kiss_fftr.c
-
+	src/StretcherImpl.cpp
+        
 LIBRARY_OBJECTS := $(LIBRARY_SOURCES:.cpp=.o)
 LIBRARY_OBJECTS := $(LIBRARY_OBJECTS:.c=.o)
 
diff --git a/otherbuilds/rubberband-library.vcxproj b/otherbuilds/rubberband-library.vcxproj
index 93125a2..8fbddaf 100644
--- a/otherbuilds/rubberband-library.vcxproj
+++ b/otherbuilds/rubberband-library.vcxproj
@@ -77,7 +77,7 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>..;..\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>__MSVC__;WIN32;_DEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_KISSFFT;USE_SPEEX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>__MSVC__;WIN32;_DEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_BUILTIN_FFT;USE_SPEEX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <MinimalRebuild>true</MinimalRebuild>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@@ -91,7 +91,7 @@
     <ClCompile>
       <Optimization>Disabled</Optimization>
       <AdditionalIncludeDirectories>..;..\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>__MSVC__;WIN32;_DEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_KISSFFT;USE_SPEEX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>__MSVC__;WIN32;_DEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_BUILTIN_FFT;USE_SPEEX;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
       <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
       <PrecompiledHeader>
@@ -109,7 +109,7 @@
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <OmitFramePointers>true</OmitFramePointers>
       <AdditionalIncludeDirectories>..;..\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>__MSVC__;WIN32;NDEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_KISSFFT;NO_TIMING;USE_SPEEX;NO_THREAD_CHECKS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>__MSVC__;WIN32;NDEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_BUILTIN_FFT;NO_TIMING;USE_SPEEX;NO_THREAD_CHECKS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <BufferSecurityCheck>false</BufferSecurityCheck>
       <EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
@@ -127,7 +127,7 @@
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
       <OmitFramePointers>true</OmitFramePointers>
       <AdditionalIncludeDirectories>..;..\src;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>__MSVC__;WIN32;NDEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_KISSFFT;NO_TIMING;USE_SPEEX;NO_THREAD_CHECKS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>__MSVC__;WIN32;NDEBUG;_LIB;NOMINMAX;_USE_MATH_DEFINES;USE_BUILTIN_FFT;NO_TIMING;USE_SPEEX;NO_THREAD_CHECKS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
       <BufferSecurityCheck>false</BufferSecurityCheck>
       <EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
@@ -178,8 +178,6 @@
     <ClCompile Include="..\src\dsp\AudioCurveCalculator.cpp" />
     <ClCompile Include="..\src\dsp\FFT.cpp" />
     <ClCompile Include="..\src\dsp\Resampler.cpp" />
-    <ClCompile Include="..\src\kissfft\kiss_fft.c" />
-    <ClCompile Include="..\src\kissfft\kiss_fftr.c" />
     <ClCompile Include="..\src\rubberband-c.cpp" />
     <ClCompile Include="..\src\RubberBandStretcher.cpp" />
     <ClCompile Include="..\src\speex\resample.c" />
diff --git a/src/RubberBandStretcher.cpp b/src/RubberBandStretcher.cpp
index 19d6ea1..e9851a7 100644
--- a/src/RubberBandStretcher.cpp
+++ b/src/RubberBandStretcher.cpp
@@ -23,7 +23,6 @@
 
 #include "StretcherImpl.h"
 
-using namespace std;
 
 namespace RubberBand {
 
@@ -122,7 +121,7 @@ RubberBandStretcher::setMaxProcessSize(size_t samples)
 }
 
 void
-RubberBandStretcher::setKeyFrameMap(const map<size_t, size_t> &mapping)
+RubberBandStretcher::setKeyFrameMap(const std::map<size_t, size_t> &mapping)
 {
     m_d->setKeyFrameMap(mapping);
 }
@@ -177,19 +176,19 @@ RubberBandStretcher::getInputIncrement() const
     return m_d->getInputIncrement();
 }
 
-vector<int>
+std::vector<int>
 RubberBandStretcher::getOutputIncrements() const
 {
     return m_d->getOutputIncrements();
 }
 
-vector<float>
+std::vector<float>
 RubberBandStretcher::getPhaseResetCurve() const
 {
     return m_d->getPhaseResetCurve();
 }
 
-vector<int>
+std::vector<int>
 RubberBandStretcher::getExactTimePoints() const
 {
     return m_d->getExactTimePoints();
diff --git a/src/StretcherImpl.cpp b/src/StretcherImpl.cpp
index de7f084..19813a6 100644
--- a/src/StretcherImpl.cpp
+++ b/src/StretcherImpl.cpp
@@ -674,6 +674,8 @@ RubberBandStretcher::Impl::configure()
 
             Resampler::Parameters params;
             params.quality = Resampler::FastestTolerable;
+            params.dynamism = Resampler::RatioOftenChanging;
+            params.ratioChange = Resampler::SmoothRatioChange;
             params.maxBufferSize = 4096 * 16;
             params.debugLevel = m_debugLevel;
             
@@ -818,6 +820,8 @@ RubberBandStretcher::Impl::reconfigure()
 
             Resampler::Parameters params;
             params.quality = Resampler::FastestTolerable;
+            params.dynamism = Resampler::RatioOftenChanging;
+            params.ratioChange = Resampler::SmoothRatioChange;
             params.maxBufferSize = m_sWindowSize;
             params.debugLevel = m_debugLevel;
             
diff --git a/src/dsp/BQResampler.cpp b/src/dsp/BQResampler.cpp
new file mode 100644
index 0000000..2a628f8
--- /dev/null
+++ b/src/dsp/BQResampler.cpp
@@ -0,0 +1,633 @@
+//* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    Rubber Band Library
+    An audio time-stretching and pitch-shifting library.
+    Copyright 2007-2021 Particular Programs Ltd.
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
+*/
+
+#include "BQResampler.h"
+
+#include <cmath>
+
+#include <iostream>
+
+#include "system/Allocators.h"
+#include "system/VectorOps.h"
+
+#define BQ_R__ R__
+
+using std::vector;
+using std::cerr;
+using std::endl;
+using std::min;
+using std::max;
+
+namespace RubberBand {
+
+BQResampler::BQResampler(Parameters parameters, int channels) :
+    m_qparams(parameters.quality),
+    m_dynamism(parameters.dynamism),
+    m_ratio_change(parameters.ratioChange),
+    m_debug_level(parameters.debugLevel),
+    m_initial_rate(parameters.referenceSampleRate),
+    m_channels(channels),
+    m_fade_count(0),
+    m_initialised(false)
+{
+    if (m_debug_level > 0) {
+        cerr << "BQResampler::BQResampler: "
+             << (m_dynamism == RatioOftenChanging ? "often-changing" : "mostly-fixed")
+             << ", "
+             << (m_ratio_change == SmoothRatioChange ? "smooth" : "sudden")
+             << " ratio changes, ref " << m_initial_rate << " Hz" << endl;
+    }
+    
+    if (m_dynamism == RatioOftenChanging) {
+        m_proto_length = m_qparams.proto_p * m_qparams.p_multiple + 1;
+        if (m_debug_level > 0) {
+            cerr << "BQResampler: creating prototype filter of length "
+                 << m_proto_length << endl;
+        }
+        m_prototype = make_filter(m_proto_length, m_qparams.proto_p);
+        m_prototype.push_back(0.0); // interpolate without fear
+    }
+
+    int phase_reserve = 2 * int(round(m_initial_rate));
+    int buffer_reserve = 1000 * m_channels;
+    m_state_a.phase_info.reserve(phase_reserve);
+    m_state_a.buffer.reserve(buffer_reserve);
+
+    if (m_dynamism == RatioOftenChanging) {
+        m_state_b.phase_info.reserve(phase_reserve);
+        m_state_b.buffer.reserve(buffer_reserve);
+    }
+
+    m_s = &m_state_a;
+    m_fade = &m_state_b;
+}
+
+BQResampler::BQResampler(const BQResampler &other) :
+    m_qparams(other.m_qparams),
+    m_dynamism(other.m_dynamism),
+    m_ratio_change(other.m_ratio_change),
+    m_debug_level(other.m_debug_level),
+    m_initial_rate(other.m_initial_rate),
+    m_channels(other.m_channels),
+    m_state_a(other.m_state_a),
+    m_state_b(other.m_state_b),
+    m_fade_count(other.m_fade_count),
+    m_prototype(other.m_prototype),
+    m_proto_length(other.m_proto_length),
+    m_initialised(other.m_initialised)
+{
+    if (other.m_s == &(other.m_state_a)) {
+        m_s = &m_state_a;
+        m_fade = &m_state_b;
+    } else {
+        m_s = &m_state_b;
+        m_fade = &m_state_a;
+    }
+}
+
+void
+BQResampler::reset()
+{
+    m_initialised = false;
+    m_fade_count = 0;
+}
+
+BQResampler::QualityParams::QualityParams(Quality q)
+{
+    switch (q) {
+    case Fastest:
+        p_multiple = 12;
+        proto_p = 160;
+        k_snr = 70.0;
+        k_transition = 0.2;
+        cut = 0.9;
+        break;
+    case FastestTolerable:
+        p_multiple = 62;
+        proto_p = 160;
+        k_snr = 90.0;
+        k_transition = 0.05;
+        cut = 0.975;
+        break;
+    case Best:
+        p_multiple = 122;
+        proto_p = 800;
+        k_snr = 100.0;
+        k_transition = 0.01;
+        cut = 0.995;
+        break;
+    }
+}
+    
+int
+BQResampler::resampleInterleaved(float *const out,
+                                 int outspace,
+                                 const float *const in,
+                                 int incount,
+                                 double ratio,
+                                 bool final) {
+
+    int fade_length = round(m_initial_rate / 1000.0);
+    if (fade_length < 6) {
+        fade_length = 6;
+    }
+    int max_fade = min(outspace, int(floor(incount * ratio))) / 2;
+    if (fade_length > max_fade) {
+        fade_length = max_fade;
+    }
+        
+    if (!m_initialised) {
+        state_for_ratio(*m_s, ratio, *m_fade);
+        m_initialised = true;
+    } else if (ratio != m_s->parameters.ratio) {
+        state *tmp = m_fade;
+        m_fade = m_s;
+        m_s = tmp;
+        state_for_ratio(*m_s, ratio, *m_fade);
+        if (m_ratio_change == SmoothRatioChange) {
+            if (m_debug_level > 0) {
+                cerr << "BQResampler: ratio changed, beginning fade of length "
+                     << fade_length << endl;
+            }
+            m_fade_count = fade_length;
+        }
+    }
+
+    int i = 0, o = 0;
+    int bufsize = m_s->buffer.size();
+
+    int incount_samples = incount * m_channels;
+    int outspace_samples = outspace * m_channels;
+    
+    while (o < outspace_samples) {
+        while (i < incount_samples && m_s->fill < bufsize) {
+            m_s->buffer[m_s->fill++] = in[i++];
+        }
+        if (m_s->fill == bufsize) {
+            out[o++] = reconstruct_one(m_s);
+        } else if (final && m_s->fill > m_s->centre) {
+            out[o++] = reconstruct_one(m_s);
+        } else if (final && m_s->fill == m_s->centre &&
+                   m_s->current_phase != m_s->initial_phase) {
+            out[o++] = reconstruct_one(m_s);
+        } else {
+            break;
+        }
+    }
+
+    int fbufsize = m_fade->buffer.size();
+    int fi = 0, fo = 0;
+    while (fo < o && m_fade_count > 0) {
+        while (fi < incount_samples && m_fade->fill < fbufsize) {
+            m_fade->buffer[m_fade->fill++] = in[fi++];
+        }
+        if (m_fade->fill == fbufsize) {
+            double r = reconstruct_one(m_fade);
+            double fadeWith = out[fo];
+            double extent = double(m_fade_count - 1) / double(fade_length);
+            double mixture = 0.5 * (1.0 - cos(M_PI * extent));
+            double mixed = r * mixture + fadeWith * (1.0 - mixture);
+            out[fo] = mixed;
+            ++fo;
+            if (m_fade->current_channel == 0) {
+                --m_fade_count;
+            }
+        } else {
+            break;
+        }
+    }
+        
+    return o / m_channels;
+}
+
+int
+BQResampler::gcd(int a, int b) const
+{
+    int c = a % b;
+    if (c == 0) return b;
+    else return gcd(b, c);
+}
+
+double
+BQResampler::bessel0(double x) const
+{
+    static double facsquared[] = {
+        0.0, 1.0, 4.0, 36.0,
+        576.0, 14400.0, 518400.0, 25401600.0,
+        1625702400.0, 131681894400.0, 1.316818944E13, 1.59335092224E15,
+        2.29442532803E17, 3.87757880436E19, 7.60005445655E21,
+        1.71001225272E24, 4.37763136697E26, 1.26513546506E29,
+        4.09903890678E31, 1.47975304535E34
+    };
+    static int nterms = sizeof(facsquared) / sizeof(facsquared[0]);
+    double b = 1.0;
+    for (int n = 1; n < nterms; ++n) {
+        double ff = facsquared[n];
+        double term = pow(x / 2.0, n * 2.0) / ff;
+        b += term;
+    }
+    return b;
+}
+
+vector<double>
+BQResampler::kaiser(double beta, int len) const
+{
+    double denominator = bessel0(beta);
+    int half = (len % 2 == 0 ? len/2 : (len+1)/2);
+    vector<double> v(len, 0.0);
+    for (int n = 0; n < half; ++n) {
+        double k = (2.0 * n) / (len-1) - 1.0;
+        v[n] = bessel0 (beta * sqrt(1.0 - k*k)) / denominator;
+    }
+    for (int n = half; n < len; ++n) {
+        v[n] = v[len-1 - n];
+    }
+    return v;
+}
+
+void
+BQResampler::kaiser_params(double attenuation,
+                           double transition,
+                           double &beta,
+                           int &len) const
+{
+    if (attenuation > 21.0) {
+        len = 1 + ceil((attenuation - 7.95) / (2.285 * transition));
+    } else {
+        len = 1 + ceil(5.79 / transition);
+    }
+    beta = 0.0;
+    if (attenuation > 50.0) {
+        beta = 0.1102 * (attenuation - 8.7);
+    } else if (attenuation > 21.0) {
+        beta = 0.5842 * (pow (attenuation - 21.0, 0.4)) +
+            0.07886 * (attenuation - 21.0);
+    }
+}
+
+vector<double>
+BQResampler::kaiser_for(double attenuation,
+                        double transition,
+                        int minlen,
+                        int maxlen) const
+{
+    double beta;
+    int m;
+    kaiser_params(attenuation, transition, beta, m);
+    int mb = m;
+    if (maxlen > 0 && mb > maxlen - 1) {
+        mb = maxlen - 1;
+    } else if (minlen > 0 && mb < minlen) {
+        mb = minlen;
+    }
+    if (mb % 2 == 0) ++mb;
+    if (m_debug_level > 0) {
+        cerr << "BQResampler: window attenuation " << attenuation
+             << ", transition " << transition
+             << " -> length " << m << " adjusted to " << mb
+             << ", beta " << beta << endl;
+    }
+    return kaiser(beta, mb);
+}
+    
+void
+BQResampler::sinc_multiply(double peak_to_zero, vector<double> &buf) const
+{
+    int len = int(buf.size());
+    if (len < 2) return;
+
+    int left = len / 2;
+    int right = (len + 1) / 2;
+    double m = M_PI / peak_to_zero;
+
+    for (int i = 1; i <= right; ++i) {
+        double x = i * m;
+        double sinc = sin(x) / x;
+        if (i <= left) {
+            buf[left - i] *= sinc;
+        }
+        if (i < right) {
+            buf[i + left] *= sinc;
+        }
+    }
+}
+
+BQResampler::params
+BQResampler::fill_params(double ratio, int num, int denom) const
+{
+    params p;
+    int g = gcd (num, denom);
+    p.ratio = ratio;
+    p.numerator = num / g;
+    p.denominator = denom / g;
+    p.effective = double(p.numerator) / double(p.denominator);
+    p.peak_to_zero = max(p.denominator, p.numerator);
+    p.peak_to_zero /= m_qparams.cut;
+    p.scale = double(p.numerator) / double(p.peak_to_zero);
+
+    if (m_debug_level > 0) {
+        cerr << "BQResampler: ratio " << p.ratio
+             << " -> fraction " << p.numerator << "/" << p.denominator
+             << " with error " << p.effective - p.ratio
+             << endl;
+        cerr << "BQResampler: peak-to-zero " << p.peak_to_zero
+             << ", scale " << p.scale
+             << endl;
+    }
+    
+    return p;
+}
+    
+BQResampler::params
+BQResampler::pick_params(double ratio) const
+{
+    // Farey algorithm, see
+    // https://www.johndcook.com/blog/2010/10/20/best-rational-approximation/
+    int max_denom = 192000;
+    double a = 0.0, b = 1.0, c = 1.0, d = 0.0;
+    double pa = a, pb = b, pc = c, pd = d;
+    double eps = 1e-9;
+    while (b <= max_denom && d <= max_denom) {
+        double mediant = (a + c) / (b + d);
+        if (fabs(ratio - mediant) < eps) {
+            if (b + d <= max_denom) {
+                return fill_params(ratio, a + c, b + d);
+            } else if (d > b) {
+                return fill_params(ratio, c, d);
+            } else {
+                return fill_params(ratio, a, b);
+            }
+        }
+        if (ratio > mediant) {
+            pa = a; pb = b;
+            a += c; b += d;
+        } else {
+            pc = c; pd = d;
+            c += a; d += b;
+        }
+    }
+    if (fabs(ratio - (pc / pd)) < fabs(ratio - (pa / pb))) {
+        return fill_params(ratio, pc, pd);
+    } else {
+        return fill_params(ratio, pa, pb);
+    }
+}
+
+void
+BQResampler::phase_data_for(vector<BQResampler::phase_rec> &target_phase_data,
+                            floatbuf &target_phase_sorted_filter,
+                            int filter_length,
+                            const vector<double> *filter,
+                            int initial_phase,
+                            int input_spacing,
+                            int output_spacing) const
+{
+    target_phase_data.clear();
+    target_phase_data.reserve(input_spacing);
+        
+    for (int p = 0; p < input_spacing; ++p) {
+        int next_phase = p - output_spacing;
+        while (next_phase < 0) next_phase += input_spacing;
+        next_phase %= input_spacing;
+        double dspace = double(input_spacing);
+        int zip_length = ceil(double(filter_length - p) / dspace);
+        int drop = ceil(double(max(0, output_spacing - p)) / dspace);
+        phase_rec phase;
+        phase.next_phase = next_phase;
+        phase.drop = drop;
+        phase.length = zip_length;
+        phase.start_index = 0; // we fill this in below if needed
+        target_phase_data.push_back(phase);
+    }
+
+    if (m_dynamism == RatioMostlyFixed) {
+        if (!filter) throw std::logic_error("filter required at phase_data_for in RatioMostlyFixed mode");
+        target_phase_sorted_filter.clear();
+        target_phase_sorted_filter.reserve(filter_length);
+        for (int p = initial_phase; ; ) {
+            phase_rec &phase = target_phase_data[p];
+            phase.start_index = target_phase_sorted_filter.size();
+            for (int i = 0; i < phase.length; ++i) {
+                target_phase_sorted_filter.push_back
+                    ((*filter)[i * input_spacing + p]);
+            }
+            p = phase.next_phase;
+            if (p == initial_phase) {
+                break;
+            }
+        }
+    }
+}
+
+vector<double>
+BQResampler::make_filter(int filter_length, double peak_to_zero) const
+{
+    vector<double> filter;
+    filter.reserve(filter_length);
+
+    vector<double> kaiser = kaiser_for(m_qparams.k_snr, m_qparams.k_transition,
+                                       1, filter_length);
+    int k_length = kaiser.size();
+
+    if (k_length == filter_length) {
+        sinc_multiply(peak_to_zero, kaiser);
+        return kaiser;
+    } else {
+        kaiser.push_back(0.0);
+        double m = double(k_length - 1) / double(filter_length - 1);
+        for (int i = 0; i < filter_length; ++i) {
+            double ix = i * m;
+            int iix = floor(ix);
+            double remainder = ix - iix;
+            double value = 0.0;
+            value += kaiser[iix] * (1.0 - remainder);
+            value += kaiser[iix+1] * remainder;
+            filter.push_back(value);
+        }
+        sinc_multiply(peak_to_zero, filter);
+        return filter;
+    }
+}
+
+void
+BQResampler::state_for_ratio(BQResampler::state &target_state,
+                             double ratio,
+                             const BQResampler::state &BQ_R__ prev_state) const
+{
+    params parameters = pick_params(ratio);
+    target_state.parameters = parameters;
+    
+    target_state.filter_length =
+        int(parameters.peak_to_zero * m_qparams.p_multiple + 1);
+
+    if (target_state.filter_length % 2 == 0) {
+        ++target_state.filter_length;
+    }
+
+    int half_length = target_state.filter_length / 2; // nb length is odd
+    int input_spacing = parameters.numerator;
+    int initial_phase = half_length % input_spacing;
+
+    target_state.initial_phase = initial_phase;
+    target_state.current_phase = initial_phase;
+
+    if (m_dynamism == RatioMostlyFixed) {
+        
+        if (m_debug_level > 0) {
+            cerr << "BQResampler: creating filter of length "
+                 << target_state.filter_length << endl;
+        }
+
+        vector<double> filter =
+            make_filter(target_state.filter_length, parameters.peak_to_zero);
+
+        phase_data_for(target_state.phase_info,
+                       target_state.phase_sorted_filter,
+                       target_state.filter_length, &filter,
+                       target_state.initial_phase,
+                       input_spacing,
+                       parameters.denominator);
+    } else {
+        phase_data_for(target_state.phase_info,
+                       target_state.phase_sorted_filter,
+                       target_state.filter_length, 0,
+                       target_state.initial_phase,
+                       input_spacing,
+                       parameters.denominator);
+    }
+
+    int buffer_left = half_length / input_spacing;
+    int buffer_right = buffer_left + 1;
+
+    int buffer_length = buffer_left + buffer_right;
+    buffer_length = max(buffer_length,
+                        int(prev_state.buffer.size() / m_channels));
+
+    target_state.centre = buffer_length / 2;
+    target_state.left = target_state.centre - buffer_left;
+    target_state.fill = target_state.centre;
+
+    buffer_length *= m_channels;
+    target_state.centre *= m_channels;
+    target_state.left *= m_channels;
+    target_state.fill *= m_channels;
+    
+    int n_phases = int(target_state.phase_info.size());
+
+    if (m_debug_level > 0) {
+        cerr << "BQResampler: " << m_channels << " channel(s) interleaved"
+             << ", buffer left " << buffer_left
+             << ", right " << buffer_right
+             << ", total " << buffer_length << endl;
+    
+        cerr << "BQResampler: input spacing " << input_spacing
+             << ", output spacing " << parameters.denominator
+             << ", initial phase " << initial_phase
+             << " of " << n_phases << endl;
+    }
+
+    if (prev_state.buffer.size() > 0) {
+        if (int(prev_state.buffer.size()) == buffer_length) {
+            target_state.buffer = prev_state.buffer;
+            target_state.fill = prev_state.fill;
+        } else {
+            target_state.buffer = floatbuf(buffer_length, 0.0);
+            for (int i = 0; i < prev_state.fill; ++i) {
+                int offset = i - prev_state.centre;
+                int new_ix = offset + target_state.centre;
+                if (new_ix >= 0 && new_ix < buffer_length) {
+                    target_state.buffer[new_ix] = prev_state.buffer[i];
+                    target_state.fill = new_ix + 1;
+                }
+            }
+        }
+
+        int phases_then = int(prev_state.phase_info.size());
+        double distance_through =
+            double(prev_state.current_phase) / double(phases_then);
+        target_state.current_phase = round(n_phases * distance_through);
+        if (target_state.current_phase >= n_phases) {
+            target_state.current_phase = n_phases - 1;
+        }
+    } else {
+        target_state.buffer = floatbuf(buffer_length, 0.0);
+    }
+}
+
+double
+BQResampler::reconstruct_one(state *s) const
+{
+    const phase_rec &pr = s->phase_info[s->current_phase];
+    int phase_length = pr.length;
+    double result = 0.0;
+
+    if (m_dynamism == RatioMostlyFixed) {
+        int phase_start = pr.start_index;
+        if (m_channels == 1) {
+            result = v_multiply_and_sum
+                (s->phase_sorted_filter.data() + phase_start,
+                 s->buffer.data() + s->left,
+                 phase_length);
+        } else {
+            for (int i = 0; i < phase_length; ++i) {
+                result +=
+                    s->phase_sorted_filter[phase_start + i] *
+                    s->buffer[s->left + i * m_channels + s->current_channel];
+            }
+        }
+    } else {
+        double m = double(m_proto_length - 1) / double(s->filter_length - 1);
+        for (int i = 0; i < phase_length; ++i) {
+            double sample =
+                s->buffer[s->left + i * m_channels + s->current_channel];
+            int filter_index = i * s->parameters.numerator + s->current_phase;
+            double proto_index = m * filter_index;
+            int iix = floor(proto_index);
+            double remainder = proto_index - iix;
+            double filter_value = m_prototype[iix] * (1.0 - remainder);
+            filter_value += m_prototype[iix+1] * remainder;
+            result += filter_value * sample;
+        }
+    }
+
+    s->current_channel = (s->current_channel + 1) % m_channels;
+    
+    if (s->current_channel == 0) {
+
+        if (pr.drop > 0) {
+            int drop = pr.drop * m_channels;
+            v_move(s->buffer.data(), s->buffer.data() + drop,
+                   int(s->buffer.size()) - drop);
+            for (int i = 1; i <= drop; ++i) {
+                s->buffer[s->buffer.size() - i] = 0.0;
+            }
+            s->fill -= drop;
+        }
+
+        s->current_phase = pr.next_phase;
+    }
+    
+    return result * s->parameters.scale;
+}
+
+}
diff --git a/src/dsp/BQResampler.h b/src/dsp/BQResampler.h
new file mode 100644
index 0000000..c05af1d
--- /dev/null
+++ b/src/dsp/BQResampler.h
@@ -0,0 +1,165 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+/*
+    Rubber Band Library
+    An audio time-stretching and pitch-shifting library.
+    Copyright 2007-2021 Particular Programs Ltd.
+
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+    published by the Free Software Foundation; either version 2 of the
+    License, or (at your option) any later version.  See the file
+    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
+*/
+
+#ifndef BQ_BQRESAMPLER_H
+#define BQ_BQRESAMPLER_H
+
+#include <vector>
+
+#include "system/Allocators.h"
+#include "system/VectorOps.h"
+
+namespace RubberBand {
+
+class BQResampler
+{
+public:
+    enum Quality { Best, FastestTolerable, Fastest };
+    enum Dynamism { RatioOftenChanging, RatioMostlyFixed };
+    enum RatioChange { SmoothRatioChange, SuddenRatioChange };
+    
+    struct Parameters {
+        Quality quality;
+        Dynamism dynamism; 
+        RatioChange ratioChange;
+        double referenceSampleRate;
+        int debugLevel;
+
+        Parameters() :
+            quality(FastestTolerable),
+            dynamism(RatioMostlyFixed),
+            ratioChange(SmoothRatioChange),
+            referenceSampleRate(44100),
+            debugLevel(0) { }
+    };
+
+    BQResampler(Parameters parameters, int channels);
+    BQResampler(const BQResampler &);
+
+    int resampleInterleaved(float *const out, int outspace,
+                            const float *const in, int incount,
+                            double ratio, bool final);
+
+    void reset();
+
+private:
+    struct QualityParams {
+        int p_multiple;
+        int proto_p;
+        double k_snr;
+        double k_transition;
+        double cut;
+        QualityParams(Quality);
+    };
+
+    const QualityParams m_qparams;
+    const Dynamism m_dynamism;
+    const RatioChange m_ratio_change;
+    const int m_debug_level;
+    const double m_initial_rate;
+    const int m_channels;
+
+    struct params {
+        double ratio;
+        int numerator;
+        int denominator;
+        double effective;
+        double peak_to_zero;
+        double scale;
+        params() : ratio(1.0), numerator(1), denominator(1),
+                   effective(1.0), peak_to_zero(0), scale(1.0) { }
+    };
+
+    struct phase_rec {
+        int next_phase;
+        int length;
+        int start_index;
+        int drop;
+        phase_rec() : next_phase(0), length(0), start_index(0), drop(0) { }
+    };
+
+    typedef std::vector<float, RubberBand::StlAllocator<float> > floatbuf;
+    
+    struct state {
+        params parameters;
+        int initial_phase;
+        int current_phase;
+        int current_channel;
+        int filter_length;
+        std::vector<phase_rec> phase_info;
+        floatbuf phase_sorted_filter;
+        floatbuf buffer;
+        int left;
+        int centre;
+        int fill;
+        state() : initial_phase(0), current_phase(0), current_channel(0),
+                  filter_length(0), left(0), centre(0), fill(0) { }
+    };
+
+    state m_state_a;
+    state m_state_b;
+
+    state *m_s;        // points at either m_state_a or m_state_b
+    state *m_fade;     // whichever one m_s does not point to
+    
+    int m_fade_count;
+    
+    std::vector<double> m_prototype;
+    int m_proto_length;
+    bool m_initialised;
+
+    int gcd(int a, int b) const;
+    double bessel0(double x) const;
+    std::vector<double> kaiser(double beta, int len) const;
+    void kaiser_params(double attenuation, double transition,
+                       double &beta, int &len) const;
+    std::vector<double> kaiser_for(double attenuation, double transition,
+                                   int minlen, int maxlen) const;
+    void sinc_multiply(double peak_to_zero, std::vector<double> &buf) const;
+
+    params fill_params(double ratio, int num, int denom) const;
+    params pick_params(double ratio) const;
+
+    std::vector<double> make_filter(int filter_length,
+                                    double peak_to_zero) const;
+    
+    void phase_data_for(std::vector<phase_rec> &target_phase_data,
+                        floatbuf &target_phase_sorted_filter,
+                        int filter_length,
+                        const std::vector<double> *filter,
+                        int initial_phase,
+                        int input_spacing,
+                        int output_spacing) const;
+    
+    void state_for_ratio(state &target_state,
+                         double ratio,
+                         const state &R__ prev_state) const;
+    
+    double reconstruct_one(state *s) const;
+
+    BQResampler &operator=(const BQResampler &); // not provided
+};
+
+}
+
+#endif
diff --git a/src/dsp/FFT.cpp b/src/dsp/FFT.cpp
index 41c80db..ca2fec0 100644
--- a/src/dsp/FFT.cpp
+++ b/src/dsp/FFT.cpp
@@ -28,9 +28,19 @@
 #include "system/VectorOps.h"
 #include "system/VectorOpsComplex.h"
 
+// Define USE_FFTW_WISDOM if you are defining HAVE_FFTW3 and you want
+// to use FFTW_MEASURE mode with persistent wisdom files. This will
+// make things much slower on first use if no suitable wisdom has been
+// saved, but may be faster during subsequent use.
+//#define USE_FFTW_WISDOM 1
+
+// Define FFT_MEASUREMENT to include timing measurement code callable
+// via the static method FFT::tune(). Must be defined when the header
+// is included as well.
 //#define FFT_MEASUREMENT 1
 
 #ifdef FFT_MEASUREMENT
+#define FFT_MEASUREMENT_RETURN_RESULT_TEXT 1
 #include <sstream>
 #endif
 
@@ -47,41 +57,21 @@
 #include <Accelerate/Accelerate.h>
 #endif
 
-#ifdef HAVE_MEDIALIB
-#include <mlib_signal.h>
-#endif
-
-#ifdef HAVE_OPENMAX
-#include <omxSP.h>
-#endif
-
-#ifdef HAVE_SFFT
-extern "C" {
-#include <sfft.h>
-}
-#endif
-
-#ifdef USE_KISSFFT
-#include "kissfft/kiss_fftr.h"
+#ifdef HAVE_KISSFFT
+#include "kiss_fftr.h"
 #endif
 
 #ifndef HAVE_IPP
 #ifndef HAVE_FFTW3
-#ifndef USE_KISSFFT
+#ifndef HAVE_KISSFFT
 #ifndef USE_BUILTIN_FFT
 #ifndef HAVE_VDSP
-#ifndef HAVE_MEDIALIB
-#ifndef HAVE_OPENMAX
-#ifndef HAVE_SFFT
 #error No FFT implementation selected!
 #endif
 #endif
 #endif
 #endif
 #endif
-#endif
-#endif
-#endif
 
 #include <cmath>
 #include <iostream>
@@ -96,6 +86,8 @@ extern "C" {
 #endif
 #endif
 
+#define BQ_R__ R__
+
 namespace RubberBand {
 
 class FFTImpl
@@ -105,28 +97,30 @@ public:
 
     virtual FFT::Precisions getSupportedPrecisions() const = 0;
 
+    virtual int getSize() const = 0;
+    
     virtual void initFloat() = 0;
     virtual void initDouble() = 0;
 
-    virtual void forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut) = 0;
-    virtual void forwardInterleaved(const double *R__ realIn, double *R__ complexOut) = 0;
-    virtual void forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phaseOut) = 0;
-    virtual void forwardMagnitude(const double *R__ realIn, double *R__ magOut) = 0;
+    virtual void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) = 0;
+    virtual void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) = 0;
+    virtual void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) = 0;
+    virtual void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) = 0;
 
-    virtual void forward(const float *R__ realIn, float *R__ realOut, float *R__ imagOut) = 0;
-    virtual void forwardInterleaved(const float *R__ realIn, float *R__ complexOut) = 0;
-    virtual void forwardPolar(const float *R__ realIn, float *R__ magOut, float *R__ phaseOut) = 0;
-    virtual void forwardMagnitude(const float *R__ realIn, float *R__ magOut) = 0;
+    virtual void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) = 0;
+    virtual void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) = 0;
+    virtual void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) = 0;
+    virtual void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) = 0;
 
-    virtual void inverse(const double *R__ realIn, const double *R__ imagIn, double *R__ realOut) = 0;
-    virtual void inverseInterleaved(const double *R__ complexIn, double *R__ realOut) = 0;
-    virtual void inversePolar(const double *R__ magIn, const double *R__ phaseIn, double *R__ realOut) = 0;
-    virtual void inverseCepstral(const double *R__ magIn, double *R__ cepOut) = 0;
+    virtual void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) = 0;
+    virtual void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) = 0;
+    virtual void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) = 0;
+    virtual void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) = 0;
 
-    virtual void inverse(const float *R__ realIn, const float *R__ imagIn, float *R__ realOut) = 0;
-    virtual void inverseInterleaved(const float *R__ complexIn, float *R__ realOut) = 0;
-    virtual void inversePolar(const float *R__ magIn, const float *R__ phaseIn, float *R__ realOut) = 0;
-    virtual void inverseCepstral(const float *R__ magIn, float *R__ cepOut) = 0;
+    virtual void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) = 0;
+    virtual void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) = 0;
+    virtual void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) = 0;
+    virtual void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) = 0;
 };    
 
 namespace FFTs {
@@ -170,6 +164,10 @@ public:
         }
     }
 
+    int getSize() const {
+        return m_size;
+    }
+    
     FFT::Precisions
     getSupportedPrecisions() const {
         return FFT::SinglePrecision | FFT::DoublePrecision;
@@ -231,8 +229,7 @@ public:
 #endif
     }
 
-    void packFloat(const float *R__ re, const float *R__ im) {
-        Profiler profiler("D_IPP::packFloat");
+    void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) {
         int index = 0;
         const int hs = m_size/2;
         for (int i = 0; i <= hs; ++i) {
@@ -253,8 +250,7 @@ public:
         }
     }
 
-    void packDouble(const double *R__ re, const double *R__ im) {
-        Profiler profiler("D_IPP::packDouble");
+    void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) {
         int index = 0;
         const int hs = m_size/2;
         for (int i = 0; i <= hs; ++i) {
@@ -275,8 +271,7 @@ public:
         }
     }
 
-    void unpackFloat(float *re, float *R__ im) { // re may be equal to m_fpacked
-        Profiler profiler("D_IPP::unpackFloat");
+    void unpackFloat(float *re, float *BQ_R__ im) { // re may be equal to m_fpacked
         int index = 0;
         const int hs = m_size/2;
         if (im) {
@@ -292,8 +287,7 @@ public:
         }
     }        
 
-    void unpackDouble(double *re, double *R__ im) { // re may be equal to m_dpacked
-        Profiler profiler("D_IPP::unpackDouble");
+    void unpackDouble(double *re, double *BQ_R__ im) { // re may be equal to m_dpacked
         int index = 0;
         const int hs = m_size/2;
         if (im) {
@@ -309,90 +303,75 @@ public:
         }
     }        
 
-    void forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut) {
-        Profiler profiler("D_IPP::forward [d]");
+    void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) {
         if (!m_dspec) initDouble();
         ippsFFTFwd_RToCCS_64f(realIn, m_dpacked, m_dspec, m_dbuf);
         unpackDouble(realOut, imagOut);
     }
 
-    void forwardInterleaved(const double *R__ realIn, double *R__ complexOut) {
-        Profiler profiler("D_IPP::forwardInterleaved [d]");
+    void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) {
         if (!m_dspec) initDouble();
         ippsFFTFwd_RToCCS_64f(realIn, complexOut, m_dspec, m_dbuf);
     }
 
-    void forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phaseOut) {
-        Profiler profiler("D_IPP::forwardPolar [d]");
+    void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) {
         if (!m_dspec) initDouble();
         ippsFFTFwd_RToCCS_64f(realIn, m_dpacked, m_dspec, m_dbuf);
         unpackDouble(m_dpacked, m_dspare);
-        Profiler profiler2("D_IPP::forwardPolar [d] conv");
         ippsCartToPolar_64f(m_dpacked, m_dspare, magOut, phaseOut, m_size/2+1);
     }
 
-    void forwardMagnitude(const double *R__ realIn, double *R__ magOut) {
-        Profiler profiler("D_IPP::forwardMagnitude [d]");
+    void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) {
         if (!m_dspec) initDouble();
         ippsFFTFwd_RToCCS_64f(realIn, m_dpacked, m_dspec, m_dbuf);
         unpackDouble(m_dpacked, m_dspare);
         ippsMagnitude_64f(m_dpacked, m_dspare, magOut, m_size/2+1);
     }
 
-    void forward(const float *R__ realIn, float *R__ realOut, float *R__ imagOut) {
-        Profiler profiler("D_IPP::forward [f]");
+    void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) {
         if (!m_fspec) initFloat();
         ippsFFTFwd_RToCCS_32f(realIn, m_fpacked, m_fspec, m_fbuf);
         unpackFloat(realOut, imagOut);
     }
 
-    void forwardInterleaved(const float *R__ realIn, float *R__ complexOut) {
-        Profiler profiler("D_IPP::forwardInterleaved [f]");
+    void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) {
         if (!m_fspec) initFloat();
         ippsFFTFwd_RToCCS_32f(realIn, complexOut, m_fspec, m_fbuf);
     }
 
-    void forwardPolar(const float *R__ realIn, float *R__ magOut, float *R__ phaseOut) {
-        Profiler profiler("D_IPP::forwardPolar [f]");
+    void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) {
         if (!m_fspec) initFloat();
         ippsFFTFwd_RToCCS_32f(realIn, m_fpacked, m_fspec, m_fbuf);
         unpackFloat(m_fpacked, m_fspare);
-        Profiler profiler2("D_IPP::forwardPolar [f] conv");
         ippsCartToPolar_32f(m_fpacked, m_fspare, magOut, phaseOut, m_size/2+1);
     }
 
-    void forwardMagnitude(const float *R__ realIn, float *R__ magOut) {
-        Profiler profiler("D_IPP::forwardMagnitude [f]");
+    void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) {
         if (!m_fspec) initFloat();
         ippsFFTFwd_RToCCS_32f(realIn, m_fpacked, m_fspec, m_fbuf);
         unpackFloat(m_fpacked, m_fspare);
         ippsMagnitude_32f(m_fpacked, m_fspare, magOut, m_size/2+1);
     }
 
-    void inverse(const double *R__ realIn, const double *R__ imagIn, double *R__ realOut) {
-        Profiler profiler("D_IPP::inverse [d]");
+    void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) {
         if (!m_dspec) initDouble();
         packDouble(realIn, imagIn);
         ippsFFTInv_CCSToR_64f(m_dpacked, realOut, m_dspec, m_dbuf);
     }
 
-    void inverseInterleaved(const double *R__ complexIn, double *R__ realOut) {
-        Profiler profiler("D_IPP::inverse [d]");
+    void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) {
         if (!m_dspec) initDouble();
         ippsFFTInv_CCSToR_64f(complexIn, realOut, m_dspec, m_dbuf);
     }
 
-    void inversePolar(const double *R__ magIn, const double *R__ phaseIn, double *R__ realOut) {
-        Profiler profiler("D_IPP::inversePolar [d]");
+    void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) {
         if (!m_dspec) initDouble();
         ippsPolarToCart_64f(magIn, phaseIn, realOut, m_dspare, m_size/2+1);
-        Profiler profiler2("D_IPP::inversePolar [d] postconv");
         packDouble(realOut, m_dspare); // to m_dpacked
         ippsFFTInv_CCSToR_64f(m_dpacked, realOut, m_dspec, m_dbuf);
     }
 
-    void inverseCepstral(const double *R__ magIn, double *R__ cepOut) {
-        Profiler profiler("D_IPP::inverseCepstral [d]");
+    void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) {
         if (!m_dspec) initDouble();
         const int hs1 = m_size/2 + 1;
         ippsCopy_64f(magIn, m_dspare, hs1);
@@ -402,30 +381,25 @@ public:
         ippsFFTInv_CCSToR_64f(m_dpacked, cepOut, m_dspec, m_dbuf);
     }
     
-    void inverse(const float *R__ realIn, const float *R__ imagIn, float *R__ realOut) {
-        Profiler profiler("D_IPP::inverse [f]");
+    void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) {
         if (!m_fspec) initFloat();
         packFloat(realIn, imagIn);
         ippsFFTInv_CCSToR_32f(m_fpacked, realOut, m_fspec, m_fbuf);
     }
 
-    void inverseInterleaved(const float *R__ complexIn, float *R__ realOut) {
-        Profiler profiler("D_IPP::inverse [f]");
+    void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) {
         if (!m_fspec) initFloat();
         ippsFFTInv_CCSToR_32f(complexIn, realOut, m_fspec, m_fbuf);
     }
 
-    void inversePolar(const float *R__ magIn, const float *R__ phaseIn, float *R__ realOut) {
-        Profiler profiler("D_IPP::inversePolar [f]");
+    void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) {
         if (!m_fspec) initFloat();
         ippsPolarToCart_32f(magIn, phaseIn, realOut, m_fspare, m_size/2+1);
-        Profiler profiler2("D_IPP::inversePolar [f] postconv");
         packFloat(realOut, m_fspare); // to m_fpacked
         ippsFFTInv_CCSToR_32f(m_fpacked, realOut, m_fspec, m_fbuf);
     }
 
-    void inverseCepstral(const float *R__ magIn, float *R__ cepOut) {
-        Profiler profiler("D_IPP::inverseCepstral [f]");
+    void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) {
         if (!m_fspec) initFloat();
         const int hs1 = m_size/2 + 1;
         ippsCopy_32f(magIn, m_fspare, hs1);
@@ -495,6 +469,10 @@ public:
         }
     }
 
+    int getSize() const {
+        return m_size;
+    }
+
     FFT::Precisions
     getSupportedPrecisions() const {
         return FFT::SinglePrecision | FFT::DoublePrecision;
@@ -530,11 +508,11 @@ public:
         m_dspare2 = allocate<double>(m_size + 2);
     }
 
-    void packReal(const float *R__ const re) {
+    void packReal(const float *BQ_R__ const re) {
         // Pack input for forward transform 
         vDSP_ctoz((DSPComplex *)re, 2, m_fpacked, 1, m_size/2);
     }
-    void packComplex(const float *R__ const re, const float *R__ const im) {
+    void packComplex(const float *BQ_R__ const re, const float *BQ_R__ const im) {
         // Pack input for inverse transform 
         if (re) v_copy(m_fpacked->realp, re, m_size/2 + 1);
         else v_zero(m_fpacked->realp, m_size/2 + 1);
@@ -543,32 +521,32 @@ public:
         fnyq();
     }
 
-    void unpackReal(float *R__ const re) {
+    void unpackReal(float *BQ_R__ const re) {
         // Unpack output for inverse transform
         vDSP_ztoc(m_fpacked, 1, (DSPComplex *)re, 2, m_size/2);
     }
-    void unpackComplex(float *R__ const re, float *R__ const im) {
+    void unpackComplex(float *BQ_R__ const re, float *BQ_R__ const im) {
         // Unpack output for forward transform
         // vDSP forward FFTs are scaled 2x (for some reason)
         float two = 2.f;
         vDSP_vsdiv(m_fpacked->realp, 1, &two, re, 1, m_size/2 + 1);
         vDSP_vsdiv(m_fpacked->imagp, 1, &two, im, 1, m_size/2 + 1);
     }
-    void unpackComplex(float *R__ const cplx) {
+    void unpackComplex(float *BQ_R__ const cplx) {
         // Unpack output for forward transform
         // vDSP forward FFTs are scaled 2x (for some reason)
         const int hs1 = m_size/2 + 1;
         for (int i = 0; i < hs1; ++i) {
-            cplx[i*2] = m_fpacked->realp[i] / 2.f;
-            cplx[i*2+1] = m_fpacked->imagp[i] / 2.f;
+            cplx[i*2] = m_fpacked->realp[i] * 0.5f;
+            cplx[i*2+1] = m_fpacked->imagp[i] * 0.5f;
         }
     }
 
-    void packReal(const double *R__ const re) {
+    void packReal(const double *BQ_R__ const re) {
         // Pack input for forward transform
         vDSP_ctozD((DSPDoubleComplex *)re, 2, m_dpacked, 1, m_size/2);
     }
-    void packComplex(const double *R__ const re, const double *R__ const im) {
+    void packComplex(const double *BQ_R__ const re, const double *BQ_R__ const im) {
         // Pack input for inverse transform
         if (re) v_copy(m_dpacked->realp, re, m_size/2 + 1);
         else v_zero(m_dpacked->realp, m_size/2 + 1);
@@ -577,24 +555,24 @@ public:
         dnyq();
     }
 
-    void unpackReal(double *R__ const re) {
+    void unpackReal(double *BQ_R__ const re) {
         // Unpack output for inverse transform
         vDSP_ztocD(m_dpacked, 1, (DSPDoubleComplex *)re, 2, m_size/2);
     }
-    void unpackComplex(double *R__ const re, double *R__ const im) {
+    void unpackComplex(double *BQ_R__ const re, double *BQ_R__ const im) {
         // Unpack output for forward transform
         // vDSP forward FFTs are scaled 2x (for some reason)
         double two = 2.0;
         vDSP_vsdivD(m_dpacked->realp, 1, &two, re, 1, m_size/2 + 1);
         vDSP_vsdivD(m_dpacked->imagp, 1, &two, im, 1, m_size/2 + 1);
     }
-    void unpackComplex(double *R__ const cplx) {
+    void unpackComplex(double *BQ_R__ const cplx) {
         // Unpack output for forward transform
         // vDSP forward FFTs are scaled 2x (for some reason)
         const int hs1 = m_size/2 + 1;
         for (int i = 0; i < hs1; ++i) {
-            cplx[i*2] = m_dpacked->realp[i] / 2.0;
-            cplx[i*2+1] = m_dpacked->imagp[i] / 2.0;
+            cplx[i*2] = m_dpacked->realp[i] * 0.5;
+            cplx[i*2+1] = m_dpacked->imagp[i] * 0.5;
         }
     }
 
@@ -628,8 +606,7 @@ public:
         m_dpacked->imagp[hs] = 0.;
     }
 
-    void forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut) {
-        Profiler profiler("D_VDSP::forward [d]");
+    void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) {
         if (!m_dspec) initDouble();
         packReal(realIn);
         vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_FORWARD);
@@ -637,8 +614,7 @@ public:
         unpackComplex(realOut, imagOut);
     }
 
-    void forwardInterleaved(const double *R__ realIn, double *R__ complexOut) {
-        Profiler profiler("D_VDSP::forward [d]");
+    void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) {
         if (!m_dspec) initDouble();
         packReal(realIn);
         vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_FORWARD);
@@ -646,22 +622,20 @@ public:
         unpackComplex(complexOut);
     }
 
-    void forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phaseOut) {
-        Profiler profiler("D_VDSP::forwardPolar [d]");
+    void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) {
         if (!m_dspec) initDouble();
         const int hs1 = m_size/2+1;
         packReal(realIn);
         vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_FORWARD);
         ddenyq();
         // vDSP forward FFTs are scaled 2x (for some reason)
-        for (int i = 0; i < hs1; ++i) m_dpacked->realp[i] /= 2.0;
-        for (int i = 0; i < hs1; ++i) m_dpacked->imagp[i] /= 2.0;
+        for (int i = 0; i < hs1; ++i) m_dpacked->realp[i] *= 0.5;
+        for (int i = 0; i < hs1; ++i) m_dpacked->imagp[i] *= 0.5;
         v_cartesian_to_polar(magOut, phaseOut,
                              m_dpacked->realp, m_dpacked->imagp, hs1);
     }
 
-    void forwardMagnitude(const double *R__ realIn, double *R__ magOut) {
-        Profiler profiler("D_VDSP::forwardMagnitude [d]");
+    void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) {
         if (!m_dspec) initDouble();
         packReal(realIn);
         vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_FORWARD);
@@ -674,8 +648,7 @@ public:
         vDSP_vsdivD(m_dspare2, 1, &two, magOut, 1, hs1);
     }
 
-    void forward(const float *R__ realIn, float *R__ realOut, float *R__ imagOut) {
-        Profiler profiler("D_VDSP::forward [f]");
+    void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) {
         if (!m_fspec) initFloat();
         packReal(realIn);
         vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_FORWARD);
@@ -683,8 +656,7 @@ public:
         unpackComplex(realOut, imagOut);
     }
 
-    void forwardInterleaved(const float *R__ realIn, float *R__ complexOut) {
-        Profiler profiler("D_VDSP::forward [f]");
+    void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) {
         if (!m_fspec) initFloat();
         packReal(realIn);
         vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_FORWARD);
@@ -692,22 +664,20 @@ public:
         unpackComplex(complexOut);
     }
 
-    void forwardPolar(const float *R__ realIn, float *R__ magOut, float *R__ phaseOut) {
-        Profiler profiler("D_VDSP::forwardPolar [f]");
+    void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) {
         if (!m_fspec) initFloat();
         const int hs1 = m_size/2+1;
         packReal(realIn);
         vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_FORWARD);
         fdenyq();
         // vDSP forward FFTs are scaled 2x (for some reason)
-        for (int i = 0; i < hs1; ++i) m_fpacked->realp[i] /= 2.f;
-        for (int i = 0; i < hs1; ++i) m_fpacked->imagp[i] /= 2.f;
+        for (int i = 0; i < hs1; ++i) m_fpacked->realp[i] *= 0.5f;
+        for (int i = 0; i < hs1; ++i) m_fpacked->imagp[i] *= 0.5f;
         v_cartesian_to_polar(magOut, phaseOut,
                              m_fpacked->realp, m_fpacked->imagp, hs1);
     }
 
-    void forwardMagnitude(const float *R__ realIn, float *R__ magOut) {
-        Profiler profiler("D_VDSP::forwardMagnitude [f]");
+    void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) {
         if (!m_fspec) initFloat();
         packReal(realIn);
         vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_FORWARD);
@@ -720,16 +690,14 @@ public:
         vDSP_vsdiv(m_fspare2, 1, &two, magOut, 1, hs1);
     }
 
-    void inverse(const double *R__ realIn, const double *R__ imagIn, double *R__ realOut) {
-        Profiler profiler("D_VDSP::inverse [d]");
+    void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) {
         if (!m_dspec) initDouble();
         packComplex(realIn, imagIn);
         vDSP_fft_zriptD(m_dspec, m_dpacked, 1, m_dbuf, m_order, FFT_INVERSE);
         unpackReal(realOut);
     }
 
-    void inverseInterleaved(const double *R__ complexIn, double *R__ realOut) {
-        Profiler profiler("D_VDSP::inverseInterleaved [d]");
+    void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) {
         if (!m_dspec) initDouble();
         double *d[2] = { m_dpacked->realp, m_dpacked->imagp };
         v_deinterleave(d, complexIn, 2, m_size/2 + 1);
@@ -737,8 +705,7 @@ public:
         unpackReal(realOut);
     }
 
-    void inversePolar(const double *R__ magIn, const double *R__ phaseIn, double *R__ realOut) {
-        Profiler profiler("D_VDSP::inversePolar [d]");
+    void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) {
         if (!m_dspec) initDouble();
         const int hs1 = m_size/2+1;
         vvsincos(m_dpacked->imagp, m_dpacked->realp, phaseIn, &hs1);
@@ -751,8 +718,7 @@ public:
         unpackReal(realOut);
     }
 
-    void inverseCepstral(const double *R__ magIn, double *R__ cepOut) {
-        Profiler profiler("D_VDSP::inverseCepstral [d]");
+    void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) {
         if (!m_dspec) initDouble();
         const int hs1 = m_size/2 + 1;
         v_copy(m_dspare, magIn, hs1);
@@ -761,16 +727,14 @@ public:
         inverse(m_dspare2, 0, cepOut);
     }
     
-    void inverse(const float *R__ realIn, const float *R__ imagIn, float *R__ realOut) {
-        Profiler profiler("D_VDSP::inverse [f]");
+    void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) {
         if (!m_fspec) initFloat();
         packComplex(realIn, imagIn);
         vDSP_fft_zript(m_fspec, m_fpacked, 1, m_fbuf, m_order, FFT_INVERSE);
         unpackReal(realOut);
     }
 
-    void inverseInterleaved(const float *R__ complexIn, float *R__ realOut) {
-        Profiler profiler("D_VDSP::inverseInterleaved [f]");
+    void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) {
         if (!m_fspec) initFloat();
         float *f[2] = { m_fpacked->realp, m_fpacked->imagp };
         v_deinterleave(f, complexIn, 2, m_size/2 + 1);
@@ -778,8 +742,7 @@ public:
         unpackReal(realOut);
     }
 
-    void inversePolar(const float *R__ magIn, const float *R__ phaseIn, float *R__ realOut) {
-        Profiler profiler("D_VDSP::inversePolar [f]");
+    void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) {
         if (!m_fspec) initFloat();
 
         const int hs1 = m_size/2+1;
@@ -793,8 +756,7 @@ public:
         unpackReal(realOut);
     }
 
-    void inverseCepstral(const float *R__ magIn, float *R__ cepOut) {
-        Profiler profiler("D_VDSP::inverseCepstral [f]");
+    void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) {
         if (!m_fspec) initFloat();
         const int hs1 = m_size/2 + 1;
         v_copy(m_fspare, magIn, hs1);
@@ -820,716 +782,6 @@ private:
 
 #endif /* HAVE_VDSP */
 
-#ifdef HAVE_MEDIALIB
-
-class D_MEDIALIB : public FFTImpl
-{
-public:
-    D_MEDIALIB(int size) :
-        m_size(size),
-        m_dpacked(0), m_fpacked(0)
-    { 
-        for (int i = 0; ; ++i) {
-            if (m_size & (1 << i)) {
-                m_order = i;
-                break;
-            }
-        }
-    }
-
-    ~D_MEDIALIB() {
-        if (m_dpacked) {
-            deallocate(m_dpacked);
-        }
-        if (m_fpacked) {
-            deallocate(m_fpacked);
-        }
-    }
-
-    FFT::Precisions
-    getSupportedPrecisions() const {
-        return FFT::SinglePrecision | FFT::DoublePrecision;
-    }
-
-    //!!! rv check
-
-    void initFloat() {
-        m_fpacked = allocate<float>(m_size*2);
-    }
-
-    void initDouble() {
-        m_dpacked = allocate<double>(m_size*2);
-    }
-
-    void packFloatConjugates() {
-        const int hs = m_size / 2;
-        for (int i = 1; i <= hs; ++i) {
-            m_fpacked[(m_size-i)*2] = m_fpacked[2*i];
-            m_fpacked[(m_size-i)*2 + 1] = -m_fpacked[2*i + 1];
-        }
-    }
-
-    void packDoubleConjugates() {
-        const int hs = m_size / 2;
-        for (int i = 1; i <= hs; ++i) {
-            m_dpacked[(m_size-i)*2] = m_dpacked[2*i];
-            m_dpacked[(m_size-i)*2 + 1] = -m_dpacked[2*i + 1];
-        }
-    }
-
-    void packFloat(const float *R__ re, const float *R__ im) {
-        int index = 0;
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            m_fpacked[index++] = re[i];
-            index++;
-        }
-        index = 0;
-        if (im) {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                m_fpacked[index++] = im[i];
-            }
-        } else {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                m_fpacked[index++] = 0.f;
-            }
-        }
-        packFloatConjugates();
-    }
-
-    void packDouble(const double *R__ re, const double *R__ im) {
-        int index = 0;
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            m_dpacked[index++] = re[i];
-            index++;
-        }
-        index = 0;
-        if (im) {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                m_dpacked[index++] = im[i];
-            }
-        } else {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                m_dpacked[index++] = 0.0;
-            }
-        }
-        packDoubleConjugates();
-    }
-
-    void unpackFloat(float *re, float *R__ im) { // re may be equal to m_fpacked
-        int index = 0;
-        const int hs = m_size/2;
-        if (im) {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                im[i] = m_fpacked[index++];
-            }
-        }
-        index = 0;
-        for (int i = 0; i <= hs; ++i) {
-            re[i] = m_fpacked[index++];
-            index++;
-        }
-    }        
-
-    void unpackDouble(double *re, double *R__ im) { // re may be equal to m_dpacked
-        int index = 0;
-        const int hs = m_size/2;
-        if (im) {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                im[i] = m_dpacked[index++];
-            }
-        }
-        index = 0;
-        for (int i = 0; i <= hs; ++i) {
-            re[i] = m_dpacked[index++];
-            index++;
-        }
-    }
-
-    void forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut) {
-        Profiler profiler("D_MEDIALIB::forward [d]");
-        if (!m_dpacked) initDouble();
-        mlib_SignalFFT_1_D64C_D64(m_dpacked, realIn, m_order);
-        unpackDouble(realOut, imagOut);
-    }
-
-    void forwardInterleaved(const double *R__ realIn, double *R__ complexOut) {
-        Profiler profiler("D_MEDIALIB::forwardInterleaved [d]");
-        if (!m_dpacked) initDouble();
-        // mlib FFT gives the whole redundant complex result
-        mlib_SignalFFT_1_D64C_D64(m_dpacked, realIn, m_order);
-        v_copy(complexOut, m_dpacked, m_size + 2);
-    }
-
-    void forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phaseOut) {
-        Profiler profiler("D_MEDIALIB::forwardPolar [d]");
-        if (!m_dpacked) initDouble();
-        mlib_SignalFFT_1_D64C_D64(m_dpacked, realIn, m_order);
-        const int hs = m_size/2;
-        int index = 0;
-        for (int i = 0; i <= hs; ++i) {
-            int reali = index;
-            ++index;
-            magOut[i] = sqrt(m_dpacked[reali] * m_dpacked[reali] +
-                             m_dpacked[index] * m_dpacked[index]);
-            phaseOut[i] = atan2(m_dpacked[index], m_dpacked[reali]) ;
-            ++index;
-        }
-    }
-
-    void forwardMagnitude(const double *R__ realIn, double *R__ magOut) {
-        Profiler profiler("D_MEDIALIB::forwardMagnitude [d]");
-        if (!m_dpacked) initDouble();
-        mlib_SignalFFT_1_D64C_D64(m_dpacked, realIn, m_order);
-        const int hs = m_size/2;
-        int index = 0;
-        for (int i = 0; i <= hs; ++i) {
-            int reali = index;
-            ++index;
-            magOut[i] = sqrt(m_dpacked[reali] * m_dpacked[reali] +
-                             m_dpacked[index] * m_dpacked[index]);
-            ++index;
-        }
-    }
-
-    void forward(const float *R__ realIn, float *R__ realOut, float *R__ imagOut) {
-        Profiler profiler("D_MEDIALIB::forward [f]");
-        if (!m_fpacked) initFloat();
-        mlib_SignalFFT_1_F32C_F32(m_fpacked, realIn, m_order);
-        unpackFloat(realOut, imagOut);
-    }
-
-    void forwardInterleaved(const float *R__ realIn, float *R__ complexOut) {
-        Profiler profiler("D_MEDIALIB::forwardInterleaved [f]");
-        if (!m_fpacked) initFloat();
-        // mlib FFT gives the whole redundant complex result
-        mlib_SignalFFT_1_F32C_F32(m_fpacked, realIn, m_order);
-        v_copy(complexOut, m_fpacked, m_size + 2);
-    }
-
-    void forwardPolar(const float *R__ realIn, float *R__ magOut, float *R__ phaseOut) {
-        Profiler profiler("D_MEDIALIB::forwardPolar [f]");
-        if (!m_fpacked) initFloat();
-        mlib_SignalFFT_1_F32C_F32(m_fpacked, realIn, m_order);
-        const int hs = m_size/2;
-        int index = 0;
-        for (int i = 0; i <= hs; ++i) {
-            int reali = index;
-            ++index;
-            magOut[i] = sqrtf(m_fpacked[reali] * m_fpacked[reali] +
-                              m_fpacked[index] * m_fpacked[index]);
-            phaseOut[i] = atan2f(m_fpacked[index], m_fpacked[reali]);
-            ++index;
-        }
-    }
-
-    void forwardMagnitude(const float *R__ realIn, float *R__ magOut) {
-        Profiler profiler("D_MEDIALIB::forwardMagnitude [f]");
-        if (!m_fpacked) initFloat();
-        mlib_SignalFFT_1_F32C_F32(m_fpacked, realIn, m_order);
-        const int hs = m_size/2;
-        int index = 0;
-        for (int i = 0; i <= hs; ++i) {
-            int reali = index;
-            ++index;
-            magOut[i] = sqrtf(m_fpacked[reali] * m_fpacked[reali] +
-                              m_fpacked[index] * m_fpacked[index]);
-            ++index;
-        }
-    }
-
-    void inverse(const double *R__ realIn, const double *R__ imagIn, double *R__ realOut) {
-        Profiler profiler("D_MEDIALIB::inverse [d]");
-        if (!m_dpacked) initDouble();
-        packDouble(realIn, imagIn);
-        mlib_SignalIFFT_2_D64_D64C(realOut, m_dpacked, m_order);
-    }
-
-    void inverseInterleaved(const double *R__ complexIn, double *R__ realOut) {
-        Profiler profiler("D_MEDIALIB::inverseInterleaved [d]");
-        if (!m_dpacked) initDouble();
-        v_copy(m_dpacked, complexIn, m_size + 2);
-        packDoubleConjugates();
-        mlib_SignalIFFT_2_D64_D64C(realOut, m_dpacked, m_order);
-    }
-
-    void inversePolar(const double *R__ magIn, const double *R__ phaseIn, double *R__ realOut) {
-        Profiler profiler("D_MEDIALIB::inversePolar [d]");
-        if (!m_dpacked) initDouble();
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            double real = magIn[i] * cos(phaseIn[i]);
-            double imag = magIn[i] * sin(phaseIn[i]);
-            m_dpacked[i*2] = real;
-            m_dpacked[i*2 + 1] = imag;
-        }
-        packDoubleConjugates();
-        mlib_SignalIFFT_2_D64_D64C(realOut, m_dpacked, m_order);
-    }
-
-    void inverseCepstral(const double *R__ magIn, double *R__ cepOut) {
-        Profiler profiler("D_MEDIALIB::inverseCepstral [d]");
-        if (!m_dpacked) initDouble();
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            m_dpacked[i*2] = log(magIn[i] + 0.000001);
-            m_dpacked[i*2 + 1] = 0.0;
-        }
-        packDoubleConjugates();
-        mlib_SignalIFFT_2_D64_D64C(cepOut, m_dpacked, m_order);
-    }
-    
-    void inverse(const float *R__ realIn, const float *R__ imagIn, float *R__ realOut) {
-        Profiler profiler("D_MEDIALIB::inverse [f]");
-        if (!m_fpacked) initFloat();
-        packFloat(realIn, imagIn);
-        mlib_SignalIFFT_2_F32_F32C(realOut, m_fpacked, m_order);
-    }
-    
-    void inverseInterleaved(const float *R__ complexIn, float *R__ realOut) {
-        Profiler profiler("D_MEDIALIB::inverseInterleaved [f]");
-        if (!m_fpacked) initFloat();
-        v_convert(m_fpacked, complexIn, m_size + 2);
-        packFloatConjugates();
-        mlib_SignalIFFT_2_F32_F32C(realOut, m_fpacked, m_order);
-    }
-
-    void inversePolar(const float *R__ magIn, const float *R__ phaseIn, float *R__ realOut) {
-        Profiler profiler("D_MEDIALIB::inversePolar [f]");
-        if (!m_fpacked) initFloat();
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            double real = magIn[i] * cos(phaseIn[i]);
-            double imag = magIn[i] * sin(phaseIn[i]);
-            m_fpacked[i*2] = real;
-            m_fpacked[i*2 + 1] = imag;
-        }
-        packFloatConjugates();
-        mlib_SignalIFFT_2_F32_F32C(realOut, m_fpacked, m_order);
-    }
-
-    void inverseCepstral(const float *R__ magIn, float *R__ cepOut) {
-        Profiler profiler("D_MEDIALIB::inverseCepstral [f]");
-        if (!m_fpacked) initFloat();
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            m_fpacked[i*2] = logf(magIn[i] + 0.000001);
-            m_fpacked[i*2 + 1] = 0.f;
-        }
-        packFloatConjugates();
-        mlib_SignalIFFT_2_F32_F32C(cepOut, m_fpacked, m_order);
-    }
-
-private:
-    const int m_size;
-    int m_order;
-    double *m_dpacked;
-    float *m_fpacked;
-};
-
-#endif /* HAVE_MEDIALIB */
-
-#ifdef HAVE_OPENMAX
-
-class D_OPENMAX : public FFTImpl
-{
-    // Convert a signed 32-bit integer to a float in the range [-1,1)
-    static inline float i2f(OMX_S32 i)
-    {
-        return float(i) / float(OMX_MAX_S32);
-    }
-
-    // Convert a signed 32-bit integer to a double in the range [-1,1)
-    static inline double i2d(OMX_S32 i)
-    {
-        return double(i) / double(OMX_MAX_S32);
-    }
-
-    // Convert a float in the range [-1,1) to a signed 32-bit integer
-    static inline OMX_S32 f2i(float f)
-    {
-        return OMX_S32(f * OMX_MAX_S32);
-    }
-
-    // Convert a double in the range [-1,1) to a signed 32-bit integer
-    static inline OMX_S32 d2i(double d)
-    {
-        return OMX_S32(d * OMX_MAX_S32);
-    }
-
-public:
-    D_OPENMAX(int size) :
-        m_size(size),
-        m_packed(0)
-    { 
-        for (int i = 0; ; ++i) {
-            if (m_size & (1 << i)) {
-                m_order = i;
-                break;
-            }
-        }
-    }
-
-    ~D_OPENMAX() {
-        if (m_packed) {
-            deallocate(m_packed);
-            deallocate(m_buf);
-            deallocate(m_fbuf);
-            deallocate(m_spec);
-        }
-    }
-
-    FFT::Precisions
-    getSupportedPrecisions() const {
-        return FFT::SinglePrecision;
-    }
-
-    //!!! rv check
-
-    // The OpenMAX implementation uses a fixed-point representation in
-    // 32-bit signed integers, with a downward scaling factor (0-32
-    // bits) supplied as an argument to the FFT function.
-
-    void initFloat() {
-        initDouble();
-    }
-
-    void initDouble() {
-        if (!m_packed) {
-            m_buf = allocate<OMX_S32>(m_size);
-            m_packed = allocate<OMX_S32>(m_size*2 + 2);
-            m_fbuf = allocate<float>(m_size*2 + 2);
-            OMX_INT sz = 0;
-            omxSP_FFTGetBufSize_R_S32(m_order, &sz);
-            m_spec = (OMXFFTSpec_R_S32 *)allocate<char>(sz);
-            omxSP_FFTInit_R_S32(m_spec, m_order);
-        }
-    }
-
-    void packFloat(const float *R__ re) {
-        // prepare fixed point input for forward transform
-        for (int i = 0; i < m_size; ++i) {
-            m_buf[i] = f2i(re[i]);
-        }
-    }
-
-    void packDouble(const double *R__ re) {
-        // prepare fixed point input for forward transform
-        for (int i = 0; i < m_size; ++i) {
-            m_buf[i] = d2i(re[i]);
-        }
-    }
-
-    void unpackFloat(float *R__ re, float *R__ im) {
-        // convert fixed point output for forward transform
-        int index = 0;
-        const int hs = m_size/2;
-        if (im) {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                im[i] = i2f(m_packed[index++]);
-            }
-            v_scale(im, m_size, hs + 1);
-        }
-        index = 0;
-        for (int i = 0; i <= hs; ++i) {
-            re[i] = i2f(m_packed[index++]);
-            index++;
-        }
-        v_scale(re, m_size, hs + 1);
-    }        
-
-    void unpackDouble(double *R__ re, double *R__ im) {
-        // convert fixed point output for forward transform
-        int index = 0;
-        const int hs = m_size/2;
-        if (im) {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                im[i] = i2d(m_packed[index++]);
-            }
-            v_scale(im, m_size, hs + 1);
-        }
-        index = 0;
-        for (int i = 0; i <= hs; ++i) {
-            re[i] = i2d(m_packed[index++]);
-            index++;
-        }
-        v_scale(re, m_size, hs + 1);
-    }
-
-    void unpackFloatInterleaved(float *R__ cplx) {
-        // convert fixed point output for forward transform
-        for (int i = 0; i < m_size + 2; ++i) {
-            cplx[i] = i2f(m_packed[i]);
-        }            
-        v_scale(cplx, m_size, m_size + 2);
-    }
-
-    void unpackDoubleInterleaved(double *R__ cplx) {
-        // convert fixed point output for forward transform
-        for (int i = 0; i < m_size + 2; ++i) {
-            cplx[i] = i2d(m_packed[i]);
-        }            
-        v_scale(cplx, m_size, m_size + 2);
-    }
-
-    void packFloat(const float *R__ re, const float *R__ im) {
-        // prepare fixed point input for inverse transform
-        int index = 0;
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            m_packed[index++] = f2i(re[i]);
-            index++;
-        }
-        index = 0;
-        if (im) {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                m_packed[index++] = f2i(im[i]);
-            }
-        } else {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                m_packed[index++] = 0;
-            }
-        }
-    }
-
-    void packDouble(const double *R__ re, const double *R__ im) {
-        // prepare fixed point input for inverse transform
-        int index = 0;
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            m_packed[index++] = d2i(re[i]);
-            index++;
-        }
-        index = 0;
-        if (im) {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                m_packed[index++] = d2i(im[i]);
-            }
-        } else {
-            for (int i = 0; i <= hs; ++i) {
-                index++;
-                m_packed[index++] = 0;
-            }
-        }
-    }
-
-    void convertFloat(const float *R__ f) {
-        // convert interleaved input for inverse interleaved transform
-        const int n = m_size + 2;
-        for (int i = 0; i < n; ++i) {
-            m_packed[i] = f2i(f[i]);
-        }
-    }        
-
-    void convertDouble(const double *R__ d) {
-        // convert interleaved input for inverse interleaved transform
-        const int n = m_size + 2;
-        for (int i = 0; i < n; ++i) {
-            m_packed[i] = d2i(d[i]);
-        }
-    }        
-
-    void unpackFloat(float *R__ re) {
-        // convert fixed point output for inverse transform
-        for (int i = 0; i < m_size; ++i) {
-            re[i] = i2f(m_buf[i]) * m_size;
-        }
-    }
-
-    void unpackDouble(double *R__ re) {
-        // convert fixed point output for inverse transform
-        for (int i = 0; i < m_size; ++i) {
-            re[i] = i2d(m_buf[i]) * m_size;
-        }
-    }
-
-    void forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut) {
-        Profiler profiler("D_OPENMAX::forward [d]");
-        if (!m_packed) initDouble();
-        packDouble(realIn);
-        omxSP_FFTFwd_RToCCS_S32_Sfs(m_buf, m_packed, m_spec, m_order);
-        unpackDouble(realOut, imagOut);
-    }
-    
-    void forwardInterleaved(const double *R__ realIn, double *R__ complexOut) {
-        Profiler profiler("D_OPENMAX::forwardInterleaved [d]");
-        if (!m_packed) initDouble();
-        packDouble(realIn);
-        omxSP_FFTFwd_RToCCS_S32_Sfs(m_buf, m_packed, m_spec, m_order);
-        unpackDoubleInterleaved(complexOut);
-    }
-
-    void forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phaseOut) {
-        Profiler profiler("D_OPENMAX::forwardPolar [d]");
-        if (!m_packed) initDouble();
-        packDouble(realIn);
-        omxSP_FFTFwd_RToCCS_S32_Sfs(m_buf, m_packed, m_spec, m_order);
-        unpackDouble(magOut, phaseOut); // temporarily
-        // at this point we actually have real/imag in the mag/phase arrays
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            double real = magOut[i];
-            double imag = phaseOut[i];
-            c_magphase(magOut + i, phaseOut + i, real, imag);
-        }
-    }
-
-    void forwardMagnitude(const double *R__ realIn, double *R__ magOut) {
-        Profiler profiler("D_OPENMAX::forwardMagnitude [d]");
-        if (!m_packed) initDouble();
-        packDouble(realIn);
-        omxSP_FFTFwd_RToCCS_S32_Sfs(m_buf, m_packed, m_spec, m_order);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            int reali = i * 2;
-            int imagi = reali + 1;
-            double real = i2d(m_packed[reali]) * m_size;
-            double imag = i2d(m_packed[imagi]) * m_size;
-            magOut[i] = sqrt(real * real + imag * imag);
-        }
-    }
-
-    void forward(const float *R__ realIn, float *R__ realOut, float *R__ imagOut) {
-        Profiler profiler("D_OPENMAX::forward [f]");
-        if (!m_packed) initFloat();
-        packFloat(realIn);
-        omxSP_FFTFwd_RToCCS_S32_Sfs(m_buf, m_packed, m_spec, m_order);
-        unpackFloat(realOut, imagOut);
-    }
-
-    void forwardInterleaved(const float *R__ realIn, float *R__ complexOut) {
-        Profiler profiler("D_OPENMAX::forwardInterleaved [f]");
-        if (!m_packed) initFloat();
-        packFloat(realIn);
-        omxSP_FFTFwd_RToCCS_S32_Sfs(m_buf, m_packed, m_spec, m_order);
-        unpackFloatInterleaved(complexOut);
-    }
-
-    void forwardPolar(const float *R__ realIn, float *R__ magOut, float *R__ phaseOut) {
-        Profiler profiler("D_OPENMAX::forwardPolar [f]");
-        if (!m_packed) initFloat();
-
-        packFloat(realIn);
-        omxSP_FFTFwd_RToCCS_S32_Sfs(m_buf, m_packed, m_spec, m_order);
-        unpackFloat(magOut, phaseOut); // temporarily
-        // at this point we actually have real/imag in the mag/phase arrays
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            float real = magOut[i];
-            float imag = phaseOut[i];
-            c_magphase(magOut + i, phaseOut + i, real, imag);
-        }
-    }
-
-    void forwardMagnitude(const float *R__ realIn, float *R__ magOut) {
-        Profiler profiler("D_OPENMAX::forwardMagnitude [f]");
-        if (!m_packed) initFloat();
-        packFloat(realIn);
-        omxSP_FFTFwd_RToCCS_S32_Sfs(m_buf, m_packed, m_spec, m_order);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            int reali = i * 2;
-            int imagi = reali + 1;
-            float real = i2f(m_packed[reali]) * m_size;
-            float imag = i2f(m_packed[imagi]) * m_size;
-            magOut[i] = sqrtf(real * real + imag * imag);
-        }
-    }
-
-    void inverse(const double *R__ realIn, const double *R__ imagIn, double *R__ realOut) {
-        Profiler profiler("D_OPENMAX::inverse [d]");
-        if (!m_packed) initDouble();
-        packDouble(realIn, imagIn);
-        omxSP_FFTInv_CCSToR_S32_Sfs(m_packed, m_buf, m_spec, 0);
-        unpackDouble(realOut);
-    }
-
-    void inverseInterleaved(const double *R__ complexIn, double *R__ realOut) {
-        Profiler profiler("D_OPENMAX::inverseInterleaved [d]");
-        if (!m_packed) initDouble();
-        convertDouble(complexIn);
-        omxSP_FFTInv_CCSToR_S32_Sfs(m_packed, m_buf, m_spec, 0);
-        unpackDouble(realOut);
-    }
-
-    void inversePolar(const double *R__ magIn, const double *R__ phaseIn, double *R__ realOut) {
-        Profiler profiler("D_OPENMAX::inversePolar [d]");
-        if (!m_packed) initDouble();
-        int index = 0;
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            double real, imag;
-            c_phasor(&real, &imag, phaseIn[i]);
-            m_fbuf[index++] = float(real);
-            m_fbuf[index++] = float(imag);
-        }
-        convertFloat(m_fbuf);
-        omxSP_FFTInv_CCSToR_S32_Sfs(m_packed, m_buf, m_spec, 0);
-        unpackDouble(realOut);
-    }
-
-    void inverseCepstral(const double *R__ magIn, double *R__ cepOut) {
-        Profiler profiler("D_OPENMAX::inverseCepstral [d]");
-        if (!m_packed) initDouble();
-        //!!! implement
-    }
-    
-    void inverse(const float *R__ realIn, const float *R__ imagIn, float *R__ realOut) {
-        Profiler profiler("D_OPENMAX::inverse [f]");
-        if (!m_packed) initFloat();
-        packFloat(realIn, imagIn);
-        omxSP_FFTInv_CCSToR_S32_Sfs(m_packed, m_buf, m_spec, 0);
-        unpackFloat(realOut);
-    }
-
-    void inverseInterleaved(const float *R__ complexIn, float *R__ realOut) {
-        Profiler profiler("D_OPENMAX::inverse [f]");
-        if (!m_packed) initFloat();
-        convertFloat(complexIn);
-        omxSP_FFTInv_CCSToR_S32_Sfs(m_packed, m_buf, m_spec, 0);
-        unpackFloat(realOut);
-    }
-
-    void inversePolar(const float *R__ magIn, const float *R__ phaseIn, float *R__ realOut) {
-        Profiler profiler("D_OPENMAX::inversePolar [f]");
-        if (!m_packed) initFloat();
-        const int hs = m_size/2;
-        v_polar_to_cartesian_interleaved(m_fbuf, magIn, phaseIn, hs+1);
-        convertFloat(m_fbuf);
-        omxSP_FFTInv_CCSToR_S32_Sfs(m_packed, m_buf, m_spec, 0);
-        unpackFloat(realOut);
-    }
-
-    void inverseCepstral(const float *R__ magIn, float *R__ cepOut) {
-        Profiler profiler("D_OPENMAX::inverseCepstral [f]");
-        if (!m_packed) initFloat();
-        //!!! implement
-    }
-
-private:
-    const int m_size;
-    int m_order;
-    OMX_S32 *m_packed;
-    OMX_S32 *m_buf;
-    float *m_fbuf;
-    OMXFFTSpec_R_S32 *m_spec;
-
-};
-
-#endif /* HAVE_OPENMAX */
-
 #ifdef HAVE_FFTW3
 
 /*
@@ -1568,7 +820,6 @@ private:
 #define fftwf_destroy_plan fftw_destroy_plan
 #define fftwf_malloc fftw_malloc
 #define fftwf_free fftw_free
-#define fftwf_cleanup fftw_cleanup
 #define fftwf_execute fftw_execute
 #define atan2f atan2
 #define sqrtf sqrt
@@ -1587,7 +838,6 @@ private:
 #define fftw_destroy_plan fftwf_destroy_plan
 #define fftw_malloc fftwf_malloc
 #define fftw_free fftwf_free
-#define fftw_cleanup fftwf_cleanup
 #define fftw_execute fftwf_execute
 #define atan2 atan2f
 #define sqrt sqrtf
@@ -1607,50 +857,51 @@ public:
 
     ~D_FFTW() {
         if (m_fplanf) {
-#ifndef NO_THREADING
-            m_commonMutex.lock();
-#endif
+            lock();
             bool save = false;
             if (m_extantf > 0 && --m_extantf == 0) save = true;
-            (void)save;
+            (void)save; // avoid compiler warning
+#ifdef USE_FFTW_WISDOM
 #ifndef FFTW_DOUBLE_ONLY
             if (save) saveWisdom('f');
+#endif
 #endif
             fftwf_destroy_plan(m_fplanf);
             fftwf_destroy_plan(m_fplani);
             fftwf_free(m_fbuf);
             fftwf_free(m_fpacked);
-#ifndef NO_THREADING
-            m_commonMutex.unlock();
-#endif
+            unlock();
         }
         if (m_dplanf) {
-#ifndef NO_THREADING
-            m_commonMutex.lock();
-#endif
+            lock();
             bool save = false;
             if (m_extantd > 0 && --m_extantd == 0) save = true;
-            (void)save;
+            (void)save; // avoid compiler warning
+#ifdef USE_FFTW_WISDOM
 #ifndef FFTW_SINGLE_ONLY
             if (save) saveWisdom('d');
+#endif
 #endif
             fftw_destroy_plan(m_dplanf);
             fftw_destroy_plan(m_dplani);
             fftw_free(m_dbuf);
             fftw_free(m_dpacked);
-#ifndef NO_THREADING
-            m_commonMutex.unlock();
-#endif
+            unlock();
         }
-#ifndef NO_THREADING
-        m_commonMutex.lock();
-#endif
+        lock();
         if (m_extantf <= 0 && m_extantd <= 0) {
-            fftw_cleanup();
-        }
-#ifndef NO_THREADING
-        m_commonMutex.unlock();
+#ifndef FFTW_DOUBLE_ONLY
+            fftwf_cleanup();
 #endif
+#ifndef FFTW_SINGLE_ONLY
+            fftw_cleanup();
+#endif
+        }
+        unlock();
+    }
+
+    int getSize() const {
+        return m_size;
     }
 
     FFT::Precisions
@@ -1669,56 +920,68 @@ public:
     void initFloat() {
         if (m_fplanf) return;
         bool load = false;
-#ifndef NO_THREADING
-        m_commonMutex.lock();
-#endif
+        lock();
         if (m_extantf++ == 0) load = true;
+        (void)load; // avoid compiler warning
+#ifdef USE_FFTW_WISDOM
 #ifdef FFTW_DOUBLE_ONLY
         if (load) loadWisdom('d');
 #else
         if (load) loadWisdom('f');
+#endif
 #endif
         m_fbuf = (fft_float_type *)fftw_malloc(m_size * sizeof(fft_float_type));
         m_fpacked = (fftwf_complex *)fftw_malloc
             ((m_size/2 + 1) * sizeof(fftwf_complex));
+#ifdef USE_FFTW_WISDOM
         m_fplanf = fftwf_plan_dft_r2c_1d
             (m_size, m_fbuf, m_fpacked, FFTW_MEASURE);
         m_fplani = fftwf_plan_dft_c2r_1d
             (m_size, m_fpacked, m_fbuf, FFTW_MEASURE);
-#ifndef NO_THREADING
-        m_commonMutex.unlock();
+#else
+        m_fplanf = fftwf_plan_dft_r2c_1d
+            (m_size, m_fbuf, m_fpacked, FFTW_ESTIMATE);
+        m_fplani = fftwf_plan_dft_c2r_1d
+            (m_size, m_fpacked, m_fbuf, FFTW_ESTIMATE);
 #endif
+        unlock();
     }
 
     void initDouble() {
         if (m_dplanf) return;
         bool load = false;
-#ifndef NO_THREADING
-        m_commonMutex.lock();
-#endif
+        lock();
         if (m_extantd++ == 0) load = true;
+        (void)load; // avoid compiler warning
+#ifdef USE_FFTW_WISDOM
 #ifdef FFTW_SINGLE_ONLY
         if (load) loadWisdom('f');
 #else
         if (load) loadWisdom('d');
+#endif
 #endif
         m_dbuf = (fft_double_type *)fftw_malloc(m_size * sizeof(fft_double_type));
         m_dpacked = (fftw_complex *)fftw_malloc
             ((m_size/2 + 1) * sizeof(fftw_complex));
+#ifdef USE_FFTW_WISDOM
         m_dplanf = fftw_plan_dft_r2c_1d
             (m_size, m_dbuf, m_dpacked, FFTW_MEASURE);
         m_dplani = fftw_plan_dft_c2r_1d
             (m_size, m_dpacked, m_dbuf, FFTW_MEASURE);
-#ifndef NO_THREADING
-        m_commonMutex.unlock();
+#else
+        m_dplanf = fftw_plan_dft_r2c_1d
+            (m_size, m_dbuf, m_dpacked, FFTW_ESTIMATE);
+        m_dplani = fftw_plan_dft_c2r_1d
+            (m_size, m_dpacked, m_dbuf, FFTW_ESTIMATE);
 #endif
+        unlock();
     }
 
     void loadWisdom(char type) { wisdom(false, type); }
     void saveWisdom(char type) { wisdom(true, type); }
 
     void wisdom(bool save, char type) {
-
+#ifdef USE_FFTW_WISDOM
 #ifdef FFTW_DOUBLE_ONLY
         if (type == 'f') return;
 #endif
@@ -1730,7 +993,7 @@ public:
         if (!home) return;
 
         char fn[256];
-        snprintf(fn, 256, "%s/%s.%c", home, ".rubberband.wisdom", type);
+        snprintf(fn, 256, "%s/%s.%c", home, ".bqfft.wisdom", type);
 
         FILE *f = fopen(fn, save ? "wb" : "rb");
         if (!f) return;
@@ -1766,11 +1029,15 @@ public:
         }
 
         fclose(f);
+#else
+        (void)save;
+        (void)type;
+#endif
     }
 
-    void packFloat(const float *R__ re, const float *R__ im) {
+    void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) {
         const int hs = m_size/2;
-        fftwf_complex *const R__ fpacked = m_fpacked; 
+        fftwf_complex *const BQ_R__ fpacked = m_fpacked; 
         for (int i = 0; i <= hs; ++i) {
             fpacked[i][0] = re[i];
         }
@@ -1785,9 +1052,9 @@ public:
         }                
     }
 
-    void packDouble(const double *R__ re, const double *R__ im) {
+    void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) {
         const int hs = m_size/2;
-        fftw_complex *const R__ dpacked = m_dpacked; 
+        fftw_complex *const BQ_R__ dpacked = m_dpacked; 
         for (int i = 0; i <= hs; ++i) {
             dpacked[i][0] = re[i];
         }
@@ -1802,7 +1069,7 @@ public:
         }
     }
 
-    void unpackFloat(float *R__ re, float *R__ im) {
+    void unpackFloat(float *BQ_R__ re, float *BQ_R__ im) {
         const int hs = m_size/2;
         for (int i = 0; i <= hs; ++i) {
             re[i] = m_fpacked[i][0];
@@ -1814,7 +1081,7 @@ public:
         }
     }        
 
-    void unpackDouble(double *R__ re, double *R__ im) {
+    void unpackDouble(double *BQ_R__ re, double *BQ_R__ im) {
         const int hs = m_size/2;
         for (int i = 0; i <= hs; ++i) {
             re[i] = m_dpacked[i][0];
@@ -1826,10 +1093,10 @@ public:
         }
     }        
 
-    void forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut) {
+    void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) {
         if (!m_dplanf) initDouble();
         const int sz = m_size;
-        fft_double_type *const R__ dbuf = m_dbuf;
+        fft_double_type *const BQ_R__ dbuf = m_dbuf;
 #ifndef FFTW_SINGLE_ONLY
         if (realIn != dbuf) 
 #endif
@@ -1840,10 +1107,10 @@ public:
         unpackDouble(realOut, imagOut);
     }
 
-    void forwardInterleaved(const double *R__ realIn, double *R__ complexOut) {
+    void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) {
         if (!m_dplanf) initDouble();
         const int sz = m_size;
-        fft_double_type *const R__ dbuf = m_dbuf;
+        fft_double_type *const BQ_R__ dbuf = m_dbuf;
 #ifndef FFTW_SINGLE_ONLY
         if (realIn != dbuf) 
 #endif
@@ -1854,9 +1121,9 @@ public:
         v_convert(complexOut, (const fft_double_type *)m_dpacked, sz + 2);
     }
 
-    void forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phaseOut) {
+    void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) {
         if (!m_dplanf) initDouble();
-        fft_double_type *const R__ dbuf = m_dbuf;
+        fft_double_type *const BQ_R__ dbuf = m_dbuf;
         const int sz = m_size;
 #ifndef FFTW_SINGLE_ONLY
         if (realIn != dbuf)
@@ -1869,9 +1136,9 @@ public:
             (magOut, phaseOut, (const fft_double_type *)m_dpacked, m_size/2+1);
     }
 
-    void forwardMagnitude(const double *R__ realIn, double *R__ magOut) {
+    void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) {
         if (!m_dplanf) initDouble();
-        fft_double_type *const R__ dbuf = m_dbuf;
+        fft_double_type *const BQ_R__ dbuf = m_dbuf;
         const int sz = m_size;
 #ifndef FFTW_SINGLE_ONLY
         if (realIn != m_dbuf)
@@ -1884,9 +1151,9 @@ public:
             (magOut, (const fft_double_type *)m_dpacked, m_size/2+1);
     }
 
-    void forward(const float *R__ realIn, float *R__ realOut, float *R__ imagOut) {
+    void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) {
         if (!m_fplanf) initFloat();
-        fft_float_type *const R__ fbuf = m_fbuf;
+        fft_float_type *const BQ_R__ fbuf = m_fbuf;
         const int sz = m_size;
 #ifndef FFTW_DOUBLE_ONLY
         if (realIn != fbuf)
@@ -1898,9 +1165,9 @@ public:
         unpackFloat(realOut, imagOut);
     }
 
-    void forwardInterleaved(const float *R__ realIn, float *R__ complexOut) {
+    void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) {
         if (!m_fplanf) initFloat();
-        fft_float_type *const R__ fbuf = m_fbuf;
+        fft_float_type *const BQ_R__ fbuf = m_fbuf;
         const int sz = m_size;
 #ifndef FFTW_DOUBLE_ONLY
         if (realIn != fbuf)
@@ -1912,9 +1179,9 @@ public:
         v_convert(complexOut, (const fft_float_type *)m_fpacked, sz + 2);
     }
 
-    void forwardPolar(const float *R__ realIn, float *R__ magOut, float *R__ phaseOut) {
+    void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) {
         if (!m_fplanf) initFloat();
-        fft_float_type *const R__ fbuf = m_fbuf;
+        fft_float_type *const BQ_R__ fbuf = m_fbuf;
         const int sz = m_size;
 #ifndef FFTW_DOUBLE_ONLY
         if (realIn != fbuf) 
@@ -1924,12 +1191,12 @@ public:
             }
         fftwf_execute(m_fplanf);
         v_cartesian_interleaved_to_polar
-            (magOut, phaseOut, (fft_float_type *)m_fpacked, m_size/2+1);
+            (magOut, phaseOut, (const fft_float_type *)m_fpacked, m_size/2+1);
     }
 
-    void forwardMagnitude(const float *R__ realIn, float *R__ magOut) {
+    void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) {
         if (!m_fplanf) initFloat();
-        fft_float_type *const R__ fbuf = m_fbuf;
+        fft_float_type *const BQ_R__ fbuf = m_fbuf;
         const int sz = m_size;
 #ifndef FFTW_DOUBLE_ONLY
         if (realIn != fbuf)
@@ -1942,12 +1209,12 @@ public:
             (magOut, (const fft_float_type *)m_fpacked, m_size/2+1);
     }
 
-    void inverse(const double *R__ realIn, const double *R__ imagIn, double *R__ realOut) {
+    void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) {
         if (!m_dplanf) initDouble();
         packDouble(realIn, imagIn);
         fftw_execute(m_dplani);
         const int sz = m_size;
-        fft_double_type *const R__ dbuf = m_dbuf;
+        fft_double_type *const BQ_R__ dbuf = m_dbuf;
 #ifndef FFTW_SINGLE_ONLY
         if (realOut != dbuf) 
 #endif
@@ -1956,12 +1223,12 @@ public:
             }
     }
 
-    void inverseInterleaved(const double *R__ complexIn, double *R__ realOut) {
+    void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) {
         if (!m_dplanf) initDouble();
         v_convert((fft_double_type *)m_dpacked, complexIn, m_size + 2);
         fftw_execute(m_dplani);
         const int sz = m_size;
-        fft_double_type *const R__ dbuf = m_dbuf;
+        fft_double_type *const BQ_R__ dbuf = m_dbuf;
 #ifndef FFTW_SINGLE_ONLY
         if (realOut != dbuf) 
 #endif
@@ -1970,13 +1237,13 @@ public:
             }
     }
 
-    void inversePolar(const double *R__ magIn, const double *R__ phaseIn, double *R__ realOut) {
+    void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) {
         if (!m_dplanf) initDouble();
         v_polar_to_cartesian_interleaved
             ((fft_double_type *)m_dpacked, magIn, phaseIn, m_size/2+1);
         fftw_execute(m_dplani);
         const int sz = m_size;
-        fft_double_type *const R__ dbuf = m_dbuf;
+        fft_double_type *const BQ_R__ dbuf = m_dbuf;
 #ifndef FFTW_SINGLE_ONLY
         if (realOut != dbuf)
 #endif
@@ -1985,10 +1252,10 @@ public:
             }
     }
 
-    void inverseCepstral(const double *R__ magIn, double *R__ cepOut) {
+    void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) {
         if (!m_dplanf) initDouble();
-        fft_double_type *const R__ dbuf = m_dbuf;
-        fftw_complex *const R__ dpacked = m_dpacked;
+        fft_double_type *const BQ_R__ dbuf = m_dbuf;
+        fftw_complex *const BQ_R__ dpacked = m_dpacked;
         const int hs = m_size/2;
         for (int i = 0; i <= hs; ++i) {
             dpacked[i][0] = log(magIn[i] + 0.000001);
@@ -2006,12 +1273,12 @@ public:
             }
     }
 
-    void inverse(const float *R__ realIn, const float *R__ imagIn, float *R__ realOut) {
+    void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) {
         if (!m_fplanf) initFloat();
         packFloat(realIn, imagIn);
         fftwf_execute(m_fplani);
         const int sz = m_size;
-        fft_float_type *const R__ fbuf = m_fbuf;
+        fft_float_type *const BQ_R__ fbuf = m_fbuf;
 #ifndef FFTW_DOUBLE_ONLY
         if (realOut != fbuf)
 #endif
@@ -2020,12 +1287,12 @@ public:
             }
     }
 
-    void inverseInterleaved(const float *R__ complexIn, float *R__ realOut) {
+    void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) {
         if (!m_fplanf) initFloat();
         v_convert((fft_float_type *)m_fpacked, complexIn, m_size + 2);
         fftwf_execute(m_fplani);
         const int sz = m_size;
-        fft_float_type *const R__ fbuf = m_fbuf;
+        fft_float_type *const BQ_R__ fbuf = m_fbuf;
 #ifndef FFTW_DOUBLE_ONLY
         if (realOut != fbuf)
 #endif
@@ -2034,13 +1301,13 @@ public:
             }
     }
 
-    void inversePolar(const float *R__ magIn, const float *R__ phaseIn, float *R__ realOut) {
+    void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) {
         if (!m_fplanf) initFloat();
         v_polar_to_cartesian_interleaved
             ((fft_float_type *)m_fpacked, magIn, phaseIn, m_size/2+1);
         fftwf_execute(m_fplani);
         const int sz = m_size;
-        fft_float_type *const R__ fbuf = m_fbuf;
+        fft_float_type *const BQ_R__ fbuf = m_fbuf;
 #ifndef FFTW_DOUBLE_ONLY
         if (realOut != fbuf)
 #endif
@@ -2049,10 +1316,10 @@ public:
             }
     }
 
-    void inverseCepstral(const float *R__ magIn, float *R__ cepOut) {
+    void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) {
         if (!m_fplanf) initFloat();
         const int hs = m_size/2;
-        fftwf_complex *const R__ fpacked = m_fpacked;
+        fftwf_complex *const BQ_R__ fpacked = m_fpacked;
         for (int i = 0; i <= hs; ++i) {
             fpacked[i][0] = logf(magIn[i] + 0.000001f);
         }
@@ -2061,7 +1328,7 @@ public:
         }
         fftwf_execute(m_fplani);
         const int sz = m_size;
-        fft_float_type *const R__ fbuf = m_fbuf;
+        fft_float_type *const BQ_R__ fbuf = m_fbuf;
 #ifndef FFTW_DOUBLE_ONLY
         if (cepOut != fbuf)
 #endif
@@ -2090,8 +1357,20 @@ private:
     const int m_size;
     static int m_extantf;
     static int m_extantd;
-#ifndef NO_THREADING
-    static Mutex m_commonMutex;
+#ifdef NO_THREADING
+    void lock() {}
+    void unlock() {}
+#else
+#ifdef _WIN32
+    static HANDLE m_commonMutex;
+    void lock() { WaitForSingleObject(m_commonMutex, INFINITE); }
+    void unlock() { ReleaseMutex(m_commonMutex); }
+#else
+    static pthread_mutex_t m_commonMutex;
+    static bool m_haveMutex;
+    void lock() { pthread_mutex_lock(&m_commonMutex); }
+    void unlock() { pthread_mutex_unlock(&m_commonMutex); }
+#endif
 #endif
 };
 
@@ -2102,356 +1381,49 @@ int
 D_FFTW::m_extantd = 0;
 
 #ifndef NO_THREADING
-Mutex
-D_FFTW::m_commonMutex;
+#ifdef _WIN32
+HANDLE D_FFTW::m_commonMutex = CreateMutex(NULL, FALSE, NULL);
+#else
+pthread_mutex_t D_FFTW::m_commonMutex = PTHREAD_MUTEX_INITIALIZER;
 #endif
+#endif
+
+#undef fft_float_type
+#undef fft_double_type
+
+#ifdef FFTW_DOUBLE_ONLY
+#undef fftwf_complex
+#undef fftwf_plan 
+#undef fftwf_plan_dft_r2c_1d
+#undef fftwf_plan_dft_c2r_1d
+#undef fftwf_destroy_plan 
+#undef fftwf_malloc 
+#undef fftwf_free 
+#undef fftwf_execute
+#undef atan2f 
+#undef sqrtf 
+#undef cosf 
+#undef sinf
+#endif /* FFTW_DOUBLE_ONLY */
+
+#ifdef FFTW_SINGLE_ONLY
+#undef fftw_complex
+#undef fftw_plan
+#undef fftw_plan_dft_r2c_1d
+#undef fftw_plan_dft_c2r_1d 
+#undef fftw_destroy_plan
+#undef fftw_malloc
+#undef fftw_free
+#undef fftw_execute
+#undef atan2
+#undef sqrt
+#undef cos
+#undef sin
+#endif /* FFTW_SINGLE_ONLY */
 
 #endif /* HAVE_FFTW3 */
 
-#ifdef HAVE_SFFT
-
-/*
- Define SFFT_DOUBLE_ONLY to make all uses of SFFT functions be
- double-precision (so "float" FFTs are calculated by casting to
- doubles and using the double-precision SFFT function).
-
- Define SFFT_SINGLE_ONLY to make all uses of SFFT functions be
- single-precision (so "double" FFTs are calculated by casting to
- floats and using the single-precision SFFT function).
-
- Neither of these flags is desirable for either performance or
- precision.
-*/
-
-//#define SFFT_DOUBLE_ONLY 1
-//#define SFFT_SINGLE_ONLY 1
-
-#if defined(SFFT_DOUBLE_ONLY) && defined(SFFT_SINGLE_ONLY)
-// Can't meaningfully define both
-#error Can only define one of SFFT_DOUBLE_ONLY and SFFT_SINGLE_ONLY
-#endif
-
-#ifdef SFFT_DOUBLE_ONLY
-#define fft_float_type double
-#define FLAG_SFFT_FLOAT SFFT_DOUBLE
-#define atan2f atan2
-#define sqrtf sqrt
-#define cosf cos
-#define sinf sin
-#define logf log
-#else
-#define FLAG_SFFT_FLOAT SFFT_FLOAT
-#define fft_float_type float
-#endif /* SFFT_DOUBLE_ONLY */
-
-#ifdef SFFT_SINGLE_ONLY
-#define fft_double_type float
-#define FLAG_SFFT_DOUBLE SFFT_FLOAT
-#define atan2 atan2f
-#define sqrt sqrtf
-#define cos cosf
-#define sin sinf
-#define log logf
-#else
-#define FLAG_SFFT_DOUBLE SFFT_DOUBLE
-#define fft_double_type double
-#endif /* SFFT_SINGLE_ONLY */
-
-class D_SFFT : public FFTImpl
-{
-public:
-    D_SFFT(int size) :
-        m_fplanf(0), m_fplani(0), m_dplanf(0), m_dplani(0), m_size(size)
-    {
-    }
-
-    ~D_SFFT() {
-        if (m_fplanf) {
-            sfft_free(m_fplanf);
-            sfft_free(m_fplani);
-            deallocate(m_fbuf);
-            deallocate(m_fresult);
-        }
-        if (m_dplanf) {
-            sfft_free(m_dplanf);
-            sfft_free(m_dplani);
-            deallocate(m_dbuf);
-            deallocate(m_dresult);
-        }
-    }
-
-    FFT::Precisions
-    getSupportedPrecisions() const {
-#ifdef SFFT_SINGLE_ONLY
-        return FFT::SinglePrecision;
-#else
-#ifdef SFFT_DOUBLE_ONLY
-        return FFT::DoublePrecision;
-#else
-        return FFT::SinglePrecision | FFT::DoublePrecision;
-#endif
-#endif
-    }
-
-    void initFloat() {
-        if (m_fplanf) return;
-        m_fbuf = allocate<fft_float_type>(2 * m_size);
-        m_fresult = allocate<fft_float_type>(2 * m_size);
-        m_fplanf = sfft_init(m_size, SFFT_FORWARD | FLAG_SFFT_FLOAT);
-        m_fplani = sfft_init(m_size, SFFT_BACKWARD | FLAG_SFFT_FLOAT);
-        if (!m_fplanf || !m_fplani) {
-            if (!m_fplanf) {
-                std::cerr << "D_SFFT: Failed to construct forward float transform for size " << m_size << " (check SFFT library's target configuration)" << std::endl;
-            } else {
-                std::cerr << "D_SFFT: Failed to construct inverse float transform for size " << m_size << " (check SFFT library's target configuration)" << std::endl;
-            }
-#ifndef NO_EXCEPTIONS
-            throw FFT::InternalError;
-#else
-            abort();
-#endif
-        }
-    }
-
-    void initDouble() {
-        if (m_dplanf) return;
-        m_dbuf = allocate<fft_double_type>(2 * m_size);
-        m_dresult = allocate<fft_double_type>(2 * m_size);
-        m_dplanf = sfft_init(m_size, SFFT_FORWARD | FLAG_SFFT_DOUBLE);
-        m_dplani = sfft_init(m_size, SFFT_BACKWARD | FLAG_SFFT_DOUBLE);
-        if (!m_dplanf || !m_dplani) {
-            if (!m_dplanf) {
-                std::cerr << "D_SFFT: Failed to construct forward double transform for size " << m_size << " (check SFFT library's target configuration)" << std::endl;
-            } else {
-                std::cerr << "D_SFFT: Failed to construct inverse double transform for size " << m_size << " (check SFFT library's target configuration)" << std::endl;
-            }
-#ifndef NO_EXCEPTIONS
-            throw FFT::InternalError;
-#else
-            abort();
-#endif
-        }
-    }
-
-    void packFloat(const float *R__ re, const float *R__ im, fft_float_type *target, int n) {
-        for (int i = 0; i < n; ++i) target[i*2] = re[i];
-        if (im) {
-            for (int i = 0; i < n; ++i) target[i*2+1] = im[i]; 
-        } else {
-            for (int i = 0; i < n; ++i) target[i*2+1] = 0.f;
-        }                
-    }
-
-    void packDouble(const double *R__ re, const double *R__ im, fft_double_type *target, int n) {
-        for (int i = 0; i < n; ++i) target[i*2] = re[i];
-        if (im) {
-            for (int i = 0; i < n; ++i) target[i*2+1] = im[i];
-        } else {
-            for (int i = 0; i < n; ++i) target[i*2+1] = 0.0;
-        }                
-    }
-
-    void unpackFloat(const fft_float_type *source, float *R__ re, float *R__ im, int n) {
-        for (int i = 0; i < n; ++i) re[i] = source[i*2];
-        if (im) {
-            for (int i = 0; i < n; ++i) im[i] = source[i*2+1];
-        }
-    }        
-
-    void unpackDouble(const fft_double_type *source, double *R__ re, double *R__ im, int n) {
-        for (int i = 0; i < n; ++i) re[i] = source[i*2];
-        if (im) {
-            for (int i = 0; i < n; ++i) im[i] = source[i*2+1];
-        }
-    }        
-
-    template<typename T>
-    void mirror(T *R__ cplx, int n) {
-        for (int i = 1; i <= n/2; ++i) {
-            int j = n-i;
-            cplx[j*2] = cplx[i*2];
-            cplx[j*2+1] = -cplx[i*2+1];
-        }
-    }
-
-    void forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut) {
-        if (!m_dplanf) initDouble();
-        packDouble(realIn, 0, m_dbuf, m_size);
-        sfft_execute(m_dplanf, m_dbuf, m_dresult);
-        unpackDouble(m_dresult, realOut, imagOut, m_size/2+1);
-    }
-
-    void forwardInterleaved(const double *R__ realIn, double *R__ complexOut) {
-        if (!m_dplanf) initDouble();
-        packDouble(realIn, 0, m_dbuf, m_size);
-        sfft_execute(m_dplanf, m_dbuf, m_dresult);
-        v_convert(complexOut, m_dresult, m_size+2); // i.e. m_size/2+1 complex
-    }
-
-    void forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phaseOut) {
-        if (!m_dplanf) initDouble();
-        packDouble(realIn, 0, m_dbuf, m_size);
-        sfft_execute(m_dplanf, m_dbuf, m_dresult);
-        v_cartesian_interleaved_to_polar(magOut, phaseOut,
-                                         m_dresult, m_size/2+1);
-    }
-
-    void forwardMagnitude(const double *R__ realIn, double *R__ magOut) {
-        if (!m_dplanf) initDouble();
-        packDouble(realIn, 0, m_dbuf, m_size);
-        sfft_execute(m_dplanf, m_dbuf, m_dresult);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            magOut[i] = sqrt(m_dresult[i*2] * m_dresult[i*2] +
-                             m_dresult[i*2+1] * m_dresult[i*2+1]);
-        }
-    }
-
-    void forward(const float *R__ realIn, float *R__ realOut, float *R__ imagOut) {
-        if (!m_fplanf) initFloat();
-        packFloat(realIn, 0, m_fbuf, m_size);
-        sfft_execute(m_fplanf, m_fbuf, m_fresult);
-        unpackFloat(m_fresult, realOut, imagOut, m_size/2+1);
-    }
-
-    void forwardInterleaved(const float *R__ realIn, float *R__ complexOut) {
-        if (!m_fplanf) initFloat();
-        packFloat(realIn, 0, m_fbuf, m_size);
-        sfft_execute(m_fplanf, m_fbuf, m_fresult);
-        v_convert(complexOut, m_fresult, m_size+2); // i.e. m_size/2+1 complex
-    }
-
-    void forwardPolar(const float *R__ realIn, float *R__ magOut, float *R__ phaseOut) {
-        if (!m_fplanf) initFloat();
-        packFloat(realIn, 0, m_fbuf, m_size);
-        sfft_execute(m_fplanf, m_fbuf, m_fresult);
-        v_cartesian_interleaved_to_polar(magOut, phaseOut,
-                                         m_fresult, m_size/2+1);
-    }
-
-    void forwardMagnitude(const float *R__ realIn, float *R__ magOut) {
-        if (!m_fplanf) initFloat();
-        packFloat(realIn, 0, m_fbuf, m_size);
-        sfft_execute(m_fplanf, m_fbuf, m_fresult);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            magOut[i] = sqrtf(m_fresult[i*2] * m_fresult[i*2] +
-                              m_fresult[i*2+1] * m_fresult[i*2+1]);
-        }
-    }
-
-    void inverse(const double *R__ realIn, const double *R__ imagIn, double *R__ realOut) {
-        if (!m_dplanf) initDouble();
-        packDouble(realIn, imagIn, m_dbuf, m_size/2+1);
-        mirror(m_dbuf, m_size);
-        sfft_execute(m_dplani, m_dbuf, m_dresult);
-        for (int i = 0; i < m_size; ++i) {
-            realOut[i] = m_dresult[i*2];
-        }
-    }
-
-    void inverseInterleaved(const double *R__ complexIn, double *R__ realOut) {
-        if (!m_dplanf) initDouble();
-        v_convert((double *)m_dbuf, complexIn, m_size + 2);
-        mirror(m_dbuf, m_size);
-        sfft_execute(m_dplani, m_dbuf, m_dresult);
-        for (int i = 0; i < m_size; ++i) {
-            realOut[i] = m_dresult[i*2];
-        }
-    }
-
-    void inversePolar(const double *R__ magIn, const double *R__ phaseIn, double *R__ realOut) {
-        if (!m_dplanf) initDouble();
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            m_dbuf[i*2] = magIn[i] * cos(phaseIn[i]);
-            m_dbuf[i*2+1] = magIn[i] * sin(phaseIn[i]);
-        }
-        mirror(m_dbuf, m_size);
-        sfft_execute(m_dplani, m_dbuf, m_dresult);
-        for (int i = 0; i < m_size; ++i) {
-            realOut[i] = m_dresult[i*2];
-        }
-    }
-
-    void inverseCepstral(const double *R__ magIn, double *R__ cepOut) {
-        if (!m_dplanf) initDouble();
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            m_dbuf[i*2] = log(magIn[i] + 0.000001);
-            m_dbuf[i*2+1] = 0.0;
-        }
-        mirror(m_dbuf, m_size);
-        sfft_execute(m_dplani, m_dbuf, m_dresult);
-        for (int i = 0; i < m_size; ++i) {
-            cepOut[i] = m_dresult[i*2];
-        }
-    }
-
-    void inverse(const float *R__ realIn, const float *R__ imagIn, float *R__ realOut) {
-        if (!m_fplanf) initFloat();
-        packFloat(realIn, imagIn, m_fbuf, m_size/2+1);
-        mirror(m_fbuf, m_size);
-        sfft_execute(m_fplani, m_fbuf, m_fresult);
-        for (int i = 0; i < m_size; ++i) {
-            realOut[i] = m_fresult[i*2];
-        }
-    }
-
-    void inverseInterleaved(const float *R__ complexIn, float *R__ realOut) {
-        if (!m_fplanf) initFloat();
-        v_convert((float *)m_fbuf, complexIn, m_size + 2);
-        mirror(m_fbuf, m_size);
-        sfft_execute(m_fplani, m_fbuf, m_fresult);
-        for (int i = 0; i < m_size; ++i) {
-            realOut[i] = m_fresult[i*2];
-        }
-    }
-
-    void inversePolar(const float *R__ magIn, const float *R__ phaseIn, float *R__ realOut) {
-        if (!m_fplanf) initFloat();
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            m_fbuf[i*2] = magIn[i] * cosf(phaseIn[i]);
-            m_fbuf[i*2+1] = magIn[i] * sinf(phaseIn[i]);
-        }
-        mirror(m_fbuf, m_size);
-        sfft_execute(m_fplani, m_fbuf, m_fresult);
-        for (int i = 0; i < m_size; ++i) {
-            realOut[i] = m_fresult[i*2];
-        }
-    }
-
-    void inverseCepstral(const float *R__ magIn, float *R__ cepOut) {
-        if (!m_fplanf) initFloat();
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            m_fbuf[i*2] = logf(magIn[i] + 0.00001);
-            m_fbuf[i*2+1] = 0.0f;
-        }
-        sfft_execute(m_fplani, m_fbuf, m_fresult);
-        for (int i = 0; i < m_size; ++i) {
-            cepOut[i] = m_fresult[i*2];
-        }
-    }
-
-private:
-    sfft_plan_t *m_fplanf;
-    sfft_plan_t *m_fplani;
-    fft_float_type *m_fbuf;
-    fft_float_type *m_fresult;
-
-    sfft_plan_t *m_dplanf;
-    sfft_plan_t *m_dplani;
-    fft_double_type *m_dbuf;
-    fft_double_type *m_dresult;
-
-    const int m_size;
-};
-
-#endif /* HAVE_SFFT */
-
-#ifdef USE_KISSFFT
+#ifdef HAVE_KISSFFT
 
 class D_KISSFFT : public FFTImpl
 {
@@ -2478,12 +1450,15 @@ public:
     ~D_KISSFFT() {
         kiss_fftr_free(m_fplanf);
         kiss_fftr_free(m_fplani);
-        kiss_fft_cleanup();
 
         delete[] m_fbuf;
         delete[] m_fpacked;
     }
 
+    int getSize() const {
+        return m_size;
+    }
+
     FFT::Precisions
     getSupportedPrecisions() const {
         return FFT::SinglePrecision;
@@ -2492,7 +1467,7 @@ public:
     void initFloat() { }
     void initDouble() { }
 
-    void packFloat(const float *R__ re, const float *R__ im) {
+    void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) {
         const int hs = m_size/2;
         for (int i = 0; i <= hs; ++i) {
             m_fpacked[i].r = re[i];
@@ -2508,7 +1483,7 @@ public:
         }
     }
 
-    void unpackFloat(float *R__ re, float *R__ im) {
+    void unpackFloat(float *BQ_R__ re, float *BQ_R__ im) {
         const int hs = m_size/2;
         for (int i = 0; i <= hs; ++i) {
             re[i] = m_fpacked[i].r;
@@ -2520,7 +1495,7 @@ public:
         }
     }        
 
-    void packDouble(const double *R__ re, const double *R__ im) {
+    void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) {
         const int hs = m_size/2;
         for (int i = 0; i <= hs; ++i) {
             m_fpacked[i].r = float(re[i]);
@@ -2536,7 +1511,7 @@ public:
         }
     }
 
-    void unpackDouble(double *R__ re, double *R__ im) {
+    void unpackDouble(double *BQ_R__ re, double *BQ_R__ im) {
         const int hs = m_size/2;
         for (int i = 0; i <= hs; ++i) {
             re[i] = double(m_fpacked[i].r);
@@ -2548,182 +1523,104 @@ public:
         }
     }        
 
-    void forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut) {
-
+    void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) {
         v_convert(m_fbuf, realIn, m_size);
         kiss_fftr(m_fplanf, m_fbuf, m_fpacked);
         unpackDouble(realOut, imagOut);
     }
 
-    void forwardInterleaved(const double *R__ realIn, double *R__ complexOut) {
-
+    void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) {
         v_convert(m_fbuf, realIn, m_size);
         kiss_fftr(m_fplanf, m_fbuf, m_fpacked);
         v_convert(complexOut, (float *)m_fpacked, m_size + 2);
     }
 
-    void forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phaseOut) {
-
-        for (int i = 0; i < m_size; ++i) {
-            m_fbuf[i] = float(realIn[i]);
-        }
-
+    void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) {
+        v_convert(m_fbuf, realIn, m_size);
         kiss_fftr(m_fplanf, m_fbuf, m_fpacked);
-
-        const int hs = m_size/2;
-
-        for (int i = 0; i <= hs; ++i) {
-            magOut[i] = sqrt(double(m_fpacked[i].r) * double(m_fpacked[i].r) +
-                             double(m_fpacked[i].i) * double(m_fpacked[i].i));
-        }
-
-        for (int i = 0; i <= hs; ++i) {
-            phaseOut[i] = atan2(double(m_fpacked[i].i), double(m_fpacked[i].r));
-        }
+        v_cartesian_interleaved_to_polar
+            (magOut, phaseOut, (float *)m_fpacked, m_size/2+1);
     }
 
-    void forwardMagnitude(const double *R__ realIn, double *R__ magOut) {
-
-        for (int i = 0; i < m_size; ++i) {
-            m_fbuf[i] = float(realIn[i]);
-        }
-
+    void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) {
+        v_convert(m_fbuf, realIn, m_size);
         kiss_fftr(m_fplanf, m_fbuf, m_fpacked);
-
-        const int hs = m_size/2;
-
-        for (int i = 0; i <= hs; ++i) {
-            magOut[i] = sqrt(double(m_fpacked[i].r) * double(m_fpacked[i].r) +
-                             double(m_fpacked[i].i) * double(m_fpacked[i].i));
-        }
+        v_cartesian_interleaved_to_magnitudes
+            (magOut, (float *)m_fpacked, m_size/2+1);
     }
 
-    void forward(const float *R__ realIn, float *R__ realOut, float *R__ imagOut) {
-
+    void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) {
         kiss_fftr(m_fplanf, realIn, m_fpacked);
         unpackFloat(realOut, imagOut);
     }
 
-    void forwardInterleaved(const float *R__ realIn, float *R__ complexOut) {
-
+    void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) {
         kiss_fftr(m_fplanf, realIn, (kiss_fft_cpx *)complexOut);
     }
 
-    void forwardPolar(const float *R__ realIn, float *R__ magOut, float *R__ phaseOut) {
-
+    void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) {
         kiss_fftr(m_fplanf, realIn, m_fpacked);
-
-        const int hs = m_size/2;
-
-        for (int i = 0; i <= hs; ++i) {
-            magOut[i] = sqrtf(m_fpacked[i].r * m_fpacked[i].r +
-                              m_fpacked[i].i * m_fpacked[i].i);
-        }
-
-        for (int i = 0; i <= hs; ++i) {
-            phaseOut[i] = atan2f(m_fpacked[i].i, m_fpacked[i].r);
-        }
+        v_cartesian_interleaved_to_polar
+            (magOut, phaseOut, (float *)m_fpacked, m_size/2+1);
     }
 
-    void forwardMagnitude(const float *R__ realIn, float *R__ magOut) {
-
+    void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) {
         kiss_fftr(m_fplanf, realIn, m_fpacked);
-
-        const int hs = m_size/2;
-
-        for (int i = 0; i <= hs; ++i) {
-            magOut[i] = sqrtf(m_fpacked[i].r * m_fpacked[i].r +
-                              m_fpacked[i].i * m_fpacked[i].i);
-        }
+        v_cartesian_interleaved_to_magnitudes
+            (magOut, (float *)m_fpacked, m_size/2+1);
     }
 
-    void inverse(const double *R__ realIn, const double *R__ imagIn, double *R__ realOut) {
-
+    void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) {
         packDouble(realIn, imagIn);
-
         kiss_fftri(m_fplani, m_fpacked, m_fbuf);
-
-        for (int i = 0; i < m_size; ++i) {
-            realOut[i] = m_fbuf[i];
-        }
+        v_convert(realOut, m_fbuf, m_size);
     }
 
-    void inverseInterleaved(const double *R__ complexIn, double *R__ realOut) {
-
+    void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) {
         v_convert((float *)m_fpacked, complexIn, m_size + 2);
-
         kiss_fftri(m_fplani, m_fpacked, m_fbuf);
-
-        for (int i = 0; i < m_size; ++i) {
-            realOut[i] = m_fbuf[i];
-        }
+        v_convert(realOut, m_fbuf, m_size);
     }
 
-    void inversePolar(const double *R__ magIn, const double *R__ phaseIn, double *R__ realOut) {
-
-        const int hs = m_size/2;
-
-        for (int i = 0; i <= hs; ++i) {
-            m_fpacked[i].r = float(magIn[i] * cos(phaseIn[i]));
-            m_fpacked[i].i = float(magIn[i] * sin(phaseIn[i]));
-        }
-
+    void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) {
+        v_polar_to_cartesian_interleaved
+            ((float *)m_fpacked, magIn, phaseIn, m_size/2+1);
         kiss_fftri(m_fplani, m_fpacked, m_fbuf);
-
-        for (int i = 0; i < m_size; ++i) {
-            realOut[i] = m_fbuf[i];
-        }
+        v_convert(realOut, m_fbuf, m_size);
     }
 
-    void inverseCepstral(const double *R__ magIn, double *R__ cepOut) {
-
+    void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) {
         const int hs = m_size/2;
-
         for (int i = 0; i <= hs; ++i) {
             m_fpacked[i].r = float(log(magIn[i] + 0.000001));
             m_fpacked[i].i = 0.0f;
         }
-
         kiss_fftri(m_fplani, m_fpacked, m_fbuf);
-
-        for (int i = 0; i < m_size; ++i) {
-            cepOut[i] = m_fbuf[i];
-        }
+        v_convert(cepOut, m_fbuf, m_size);
     }
     
-    void inverse(const float *R__ realIn, const float *R__ imagIn, float *R__ realOut) {
-
+    void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) {
         packFloat(realIn, imagIn);
         kiss_fftri(m_fplani, m_fpacked, realOut);
     }
 
-    void inverseInterleaved(const float *R__ complexIn, float *R__ realOut) {
-
+    void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) {
         v_copy((float *)m_fpacked, complexIn, m_size + 2);
         kiss_fftri(m_fplani, m_fpacked, realOut);
     }
 
-    void inversePolar(const float *R__ magIn, const float *R__ phaseIn, float *R__ realOut) {
-
-        const int hs = m_size/2;
-
-        for (int i = 0; i <= hs; ++i) {
-            m_fpacked[i].r = magIn[i] * cosf(phaseIn[i]);
-            m_fpacked[i].i = magIn[i] * sinf(phaseIn[i]);
-        }
-
+    void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) {
+        v_polar_to_cartesian_interleaved
+            ((float *)m_fpacked, magIn, phaseIn, m_size/2+1);
         kiss_fftri(m_fplani, m_fpacked, realOut);
     }
 
-    void inverseCepstral(const float *R__ magIn, float *R__ cepOut) {
-
+    void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) {
         const int hs = m_size/2;
-
         for (int i = 0; i <= hs; ++i) {
             m_fpacked[i].r = logf(magIn[i] + 0.000001f);
             m_fpacked[i].i = 0.0f;
         }
-
         kiss_fftri(m_fplani, m_fpacked, cepOut);
     }
 
@@ -2735,51 +1632,49 @@ private:
     kiss_fft_cpx *m_fpacked;
 };
 
-#endif /* USE_KISSFFT */
+#endif /* HAVE_KISSFFT */
 
 #ifdef USE_BUILTIN_FFT
 
-class D_Cross : public FFTImpl
+class D_Builtin : public FFTImpl
 {
 public:
-    D_Cross(int size) : m_size(size), m_table(0) {
-        
-        m_a = new double[size];
-        m_b = new double[size];
-        m_c = new double[size];
-        m_d = new double[size];
-
-        m_table = new int[m_size];
-    
-        int bits;
-        int i, j, k, m;
-
-        for (i = 0; ; ++i) {
-            if (m_size & (1 << i)) {
-                bits = i;
-                break;
-            }
-        }
-        
-        for (i = 0; i < m_size; ++i) {
-            
-            m = i;
-            
-            for (j = k = 0; j < bits; ++j) {
-                k = (k << 1) | (m & 1);
-                m >>= 1;
-            }
-            
-            m_table[i] = k;
-        }
+    D_Builtin(int size) :
+        m_size(size),
+        m_half(size/2),
+        m_blockTableSize(16),
+        m_maxTabledBlock(1 << m_blockTableSize)
+    {
+        m_table = allocate_and_zero<int>(m_half);
+        m_sincos = allocate_and_zero<double>(m_blockTableSize * 4);
+        m_sincos_r = allocate_and_zero<double>(m_half);
+        m_vr = allocate_and_zero<double>(m_half);
+        m_vi = allocate_and_zero<double>(m_half);
+        m_a = allocate_and_zero<double>(m_half + 1);
+        m_b = allocate_and_zero<double>(m_half + 1);
+        m_c = allocate_and_zero<double>(m_half + 1);
+        m_d = allocate_and_zero<double>(m_half + 1);
+        m_a_and_b[0] = m_a;
+        m_a_and_b[1] = m_b;
+        m_c_and_d[0] = m_c;
+        m_c_and_d[1] = m_d;
+        makeTables();
     }
 
-    ~D_Cross() {
-        delete[] m_table;
-        delete[] m_a;
-        delete[] m_b;
-        delete[] m_c;
-        delete[] m_d;
+    ~D_Builtin() {
+        deallocate(m_table);
+        deallocate(m_sincos);
+        deallocate(m_sincos_r);
+        deallocate(m_vr);
+        deallocate(m_vi);
+        deallocate(m_a);
+        deallocate(m_b);
+        deallocate(m_c);
+        deallocate(m_d);
+    }
+
+    int getSize() const {
+        return m_size;
     }
 
     FFT::Precisions
@@ -2790,382 +1685,696 @@ public:
     void initFloat() { }
     void initDouble() { }
 
-    void forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut) {
-        basefft(false, realIn, 0, m_c, m_d);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) realOut[i] = m_c[i];
-        if (imagOut) {
-            for (int i = 0; i <= hs; ++i) imagOut[i] = m_d[i];
-        }
+    void forward(const double *BQ_R__ realIn,
+                 double *BQ_R__ realOut, double *BQ_R__ imagOut) {
+        transformF(realIn, realOut, imagOut);
     }
 
-    void forwardInterleaved(const double *R__ realIn, double *R__ complexOut) {
-        basefft(false, realIn, 0, m_c, m_d);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) complexOut[i*2] = m_c[i];
-        for (int i = 0; i <= hs; ++i) complexOut[i*2+1] = m_d[i];
+    void forwardInterleaved(const double *BQ_R__ realIn,
+                            double *BQ_R__ complexOut) {
+        transformF(realIn, m_c, m_d);
+        v_interleave(complexOut, m_c_and_d, 2, m_half + 1);
     }
 
-    void forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phaseOut) {
-        basefft(false, realIn, 0, m_c, m_d);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            magOut[i] = sqrt(m_c[i] * m_c[i] + m_d[i] * m_d[i]);
-            phaseOut[i] = atan2(m_d[i], m_c[i]) ;
-        }
+    void forwardPolar(const double *BQ_R__ realIn,
+                      double *BQ_R__ magOut, double *BQ_R__ phaseOut) {
+        transformF(realIn, m_c, m_d);
+        v_cartesian_to_polar(magOut, phaseOut, m_c, m_d, m_half + 1);
     }
 
-    void forwardMagnitude(const double *R__ realIn, double *R__ magOut) {
-        basefft(false, realIn, 0, m_c, m_d);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            magOut[i] = sqrt(m_c[i] * m_c[i] + m_d[i] * m_d[i]);
-        }
+    void forwardMagnitude(const double *BQ_R__ realIn,
+                          double *BQ_R__ magOut) {
+        transformF(realIn, m_c, m_d);
+        v_cartesian_to_magnitudes(magOut, m_c, m_d, m_half + 1);
     }
 
-    void forward(const float *R__ realIn, float *R__ realOut, float *R__ imagOut) {
-        for (int i = 0; i < m_size; ++i) m_a[i] = realIn[i];
-        basefft(false, m_a, 0, m_c, m_d);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) realOut[i] = m_c[i];
-        if (imagOut) {
-            for (int i = 0; i <= hs; ++i) imagOut[i] = m_d[i];
-        }
+    void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut,
+                 float *BQ_R__ imagOut) {
+        transformF(realIn, m_c, m_d);
+        v_convert(realOut, m_c, m_half + 1);
+        v_convert(imagOut, m_d, m_half + 1);
     }
 
-    void forwardInterleaved(const float *R__ realIn, float *R__ complexOut) {
-        for (int i = 0; i < m_size; ++i) m_a[i] = realIn[i];
-        basefft(false, m_a, 0, m_c, m_d);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) complexOut[i*2] = m_c[i];
-        for (int i = 0; i <= hs; ++i) complexOut[i*2+1] = m_d[i];
+    void forwardInterleaved(const float *BQ_R__ realIn,
+                            float *BQ_R__ complexOut) {
+        transformF(realIn, m_c, m_d);
+        for (int i = 0; i <= m_half; ++i) complexOut[i*2] = m_c[i];
+        for (int i = 0; i <= m_half; ++i) complexOut[i*2+1] = m_d[i];
     }
 
-    void forwardPolar(const float *R__ realIn, float *R__ magOut, float *R__ phaseOut) {
-        for (int i = 0; i < m_size; ++i) m_a[i] = realIn[i];
-        basefft(false, m_a, 0, m_c, m_d);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            magOut[i] = sqrt(m_c[i] * m_c[i] + m_d[i] * m_d[i]);
-            phaseOut[i] = atan2(m_d[i], m_c[i]) ;
-        }
+    void forwardPolar(const float *BQ_R__ realIn,
+                      float *BQ_R__ magOut, float *BQ_R__ phaseOut) {
+        transformF(realIn, m_c, m_d);
+        v_cartesian_to_polar(magOut, phaseOut, m_c, m_d, m_half + 1);
     }
 
-    void forwardMagnitude(const float *R__ realIn, float *R__ magOut) {
-        for (int i = 0; i < m_size; ++i) m_a[i] = realIn[i];
-        basefft(false, m_a, 0, m_c, m_d);
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            magOut[i] = sqrt(m_c[i] * m_c[i] + m_d[i] * m_d[i]);
-        }
+    void forwardMagnitude(const float *BQ_R__ realIn,
+                          float *BQ_R__ magOut) {
+        transformF(realIn, m_c, m_d);
+        v_cartesian_to_magnitudes(magOut, m_c, m_d, m_half + 1);
     }
 
-    void inverse(const double *R__ realIn, const double *R__ imagIn, double *R__ realOut) {
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            double real = realIn[i];
-            double imag = imagIn[i];
-            m_a[i] = real;
-            m_b[i] = imag;
-            if (i > 0) {
-                m_a[m_size-i] = real;
-                m_b[m_size-i] = -imag;
-            }
-        }
-        basefft(true, m_a, m_b, realOut, m_d);
+    void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn,
+                 double *BQ_R__ realOut) {
+        transformI(realIn, imagIn, realOut);
     }
 
-    void inverseInterleaved(const double *R__ complexIn, double *R__ realOut) {
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            double real = complexIn[i*2];
-            double imag = complexIn[i*2+1];
-            m_a[i] = real;
-            m_b[i] = imag;
-            if (i > 0) {
-                m_a[m_size-i] = real;
-                m_b[m_size-i] = -imag;
-            }
-        }
-        basefft(true, m_a, m_b, realOut, m_d);
+    void inverseInterleaved(const double *BQ_R__ complexIn,
+                            double *BQ_R__ realOut) {
+        v_deinterleave(m_a_and_b, complexIn, 2, m_half + 1);
+        transformI(m_a, m_b, realOut);
     }
 
-    void inversePolar(const double *R__ magIn, const double *R__ phaseIn, double *R__ realOut) {
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            double real = magIn[i] * cos(phaseIn[i]);
-            double imag = magIn[i] * sin(phaseIn[i]);
-            m_a[i] = real;
-            m_b[i] = imag;
-            if (i > 0) {
-                m_a[m_size-i] = real;
-                m_b[m_size-i] = -imag;
-            }
-        }
-        basefft(true, m_a, m_b, realOut, m_d);
+    void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn,
+                      double *BQ_R__ realOut) {
+        v_polar_to_cartesian(m_a, m_b, magIn, phaseIn, m_half + 1);
+        transformI(m_a, m_b, realOut);
     }
 
-    void inverseCepstral(const double *R__ magIn, double *R__ cepOut) {
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
+    void inverseCepstral(const double *BQ_R__ magIn,
+                         double *BQ_R__ cepOut) {
+        for (int i = 0; i <= m_half; ++i) {
             double real = log(magIn[i] + 0.000001);
             m_a[i] = real;
             m_b[i] = 0.0;
-            if (i > 0) {
-                m_a[m_size-i] = real;
-                m_b[m_size-i] = 0.0;
-            }
         }
-        basefft(true, m_a, m_b, cepOut, m_d);
+        transformI(m_a, m_b, cepOut);
     }
 
-    void inverse(const float *R__ realIn, const float *R__ imagIn, float *R__ realOut) {
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            float real = realIn[i];
-            float imag = imagIn[i];
-            m_a[i] = real;
-            m_b[i] = imag;
-            if (i > 0) {
-                m_a[m_size-i] = real;
-                m_b[m_size-i] = -imag;
-            }
-        }
-        basefft(true, m_a, m_b, m_c, m_d);
-        for (int i = 0; i < m_size; ++i) realOut[i] = m_c[i];
+    void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn,
+                 float *BQ_R__ realOut) {
+        v_convert(m_a, realIn, m_half + 1);
+        v_convert(m_b, imagIn, m_half + 1);
+        transformI(m_a, m_b, realOut);
     }
 
-    void inverseInterleaved(const float *R__ complexIn, float *R__ realOut) {
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            float real = complexIn[i*2];
-            float imag = complexIn[i*2+1];
-            m_a[i] = real;
-            m_b[i] = imag;
-            if (i > 0) {
-                m_a[m_size-i] = real;
-                m_b[m_size-i] = -imag;
-            }
-        }
-        basefft(true, m_a, m_b, m_c, m_d);
-        for (int i = 0; i < m_size; ++i) realOut[i] = m_c[i];
+    void inverseInterleaved(const float *BQ_R__ complexIn,
+                            float *BQ_R__ realOut) {
+        for (int i = 0; i <= m_half; ++i) m_a[i] = complexIn[i*2];
+        for (int i = 0; i <= m_half; ++i) m_b[i] = complexIn[i*2+1];
+        transformI(m_a, m_b, realOut);
     }
 
-    void inversePolar(const float *R__ magIn, const float *R__ phaseIn, float *R__ realOut) {
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
-            float real = magIn[i] * cosf(phaseIn[i]);
-            float imag = magIn[i] * sinf(phaseIn[i]);
-            m_a[i] = real;
-            m_b[i] = imag;
-            if (i > 0) {
-                m_a[m_size-i] = real;
-                m_b[m_size-i] = -imag;
-            }
-        }
-        basefft(true, m_a, m_b, m_c, m_d);
-        for (int i = 0; i < m_size; ++i) realOut[i] = m_c[i];
+    void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn,
+                      float *BQ_R__ realOut) {
+        v_polar_to_cartesian(m_a, m_b, magIn, phaseIn, m_half + 1);
+        transformI(m_a, m_b, realOut);
     }
 
-    void inverseCepstral(const float *R__ magIn, float *R__ cepOut) {
-        const int hs = m_size/2;
-        for (int i = 0; i <= hs; ++i) {
+    void inverseCepstral(const float *BQ_R__ magIn,
+                         float *BQ_R__ cepOut) {
+        for (int i = 0; i <= m_half; ++i) {
             float real = logf(magIn[i] + 0.000001);
             m_a[i] = real;
             m_b[i] = 0.0;
-            if (i > 0) {
-                m_a[m_size-i] = real;
-                m_b[m_size-i] = 0.0;
-            }
         }
-        basefft(true, m_a, m_b, m_c, m_d);
-        for (int i = 0; i < m_size; ++i) cepOut[i] = m_c[i];
+        transformI(m_a, m_b, cepOut);
     }
 
 private:
     const int m_size;
+    const int m_half;
+    const int m_blockTableSize;
+    const int m_maxTabledBlock;
     int *m_table;
+    double *m_sincos;
+    double *m_sincos_r;
+    double *m_vr;
+    double *m_vi;
     double *m_a;
     double *m_b;
     double *m_c;
     double *m_d;
-    void basefft(bool inverse, const double *R__ ri, const double *R__ ii, double *R__ ro, double *R__ io);
+    double *m_a_and_b[2];
+    double *m_c_and_d[2];
+
+    void makeTables() {
+
+        // main table for complex fft - this is of size m_half,
+        // because we are at heart a real-complex fft only
+        
+        int bits;
+        int i, j, k, m;
+
+        int n = m_half;
+        
+        for (i = 0; ; ++i) {
+            if (n & (1 << i)) {
+                bits = i;
+                break;
+            }
+        }
+        
+        for (i = 0; i < n; ++i) {
+            m = i;
+            for (j = k = 0; j < bits; ++j) {
+                k = (k << 1) | (m & 1);
+                m >>= 1;
+            }
+            m_table[i] = k;
+        }
+
+        // sin and cos tables for complex fft
+        int ix = 0;
+        for (i = 2; i <= m_maxTabledBlock; i <<= 1) {
+            double phase = 2.0 * M_PI / double(i);
+            m_sincos[ix++] = sin(phase);
+            m_sincos[ix++] = sin(2.0 * phase);
+            m_sincos[ix++] = cos(phase);
+            m_sincos[ix++] = cos(2.0 * phase);
+        }
+        
+        // sin and cos tables for real-complex transform
+        ix = 0;
+        for (i = 0; i < n/2; ++i) {
+            double phase = M_PI * (double(i + 1) / double(m_half) + 0.5);
+            m_sincos_r[ix++] = sin(phase);
+            m_sincos_r[ix++] = cos(phase);
+        }
+    }        
+
+    // Uses m_a and m_b internally; does not touch m_c or m_d
+    template <typename T>
+    void transformF(const T *BQ_R__ ri,
+                    double *BQ_R__ ro, double *BQ_R__ io) {
+
+        int halfhalf = m_half / 2;
+        for (int i = 0; i < m_half; ++i) {
+            m_a[i] = ri[i * 2];
+            m_b[i] = ri[i * 2 + 1];
+        }
+        transformComplex(m_a, m_b, m_vr, m_vi, false);
+        ro[0] = m_vr[0] + m_vi[0];
+        ro[m_half] = m_vr[0] - m_vi[0];
+        io[0] = io[m_half] = 0.0;
+        int ix = 0;
+        for (int i = 0; i < halfhalf; ++i) {
+            double s = -m_sincos_r[ix++];
+            double c =  m_sincos_r[ix++];
+            int k = i + 1;
+            double r0 = m_vr[k];
+            double i0 = m_vi[k];
+            double r1 = m_vr[m_half - k];
+            double i1 = -m_vi[m_half - k];
+            double tw_r = (r0 - r1) * c - (i0 - i1) * s;
+            double tw_i = (r0 - r1) * s + (i0 - i1) * c;
+            ro[k] = (r0 + r1 + tw_r) * 0.5;
+            ro[m_half - k] = (r0 + r1 - tw_r) * 0.5;
+            io[k] = (i0 + i1 + tw_i) * 0.5;
+            io[m_half - k] = (tw_i - i0 - i1) * 0.5;
+        }
+    }
+
+    // Uses m_c and m_d internally; does not touch m_a or m_b
+    template <typename T>
+    void transformI(const double *BQ_R__ ri, const double *BQ_R__ ii,
+                    T *BQ_R__ ro) {
+        
+        int halfhalf = m_half / 2;
+        m_vr[0] = ri[0] + ri[m_half];
+        m_vi[0] = ri[0] - ri[m_half];
+        int ix = 0;
+        for (int i = 0; i < halfhalf; ++i) {
+            double s = m_sincos_r[ix++];
+            double c = m_sincos_r[ix++];
+            int k = i + 1;
+            double r0 = ri[k];
+            double r1 = ri[m_half - k];
+            double i0 = ii[k];
+            double i1 = -ii[m_half - k];
+            double tw_r = (r0 - r1) * c - (i0 - i1) * s;
+            double tw_i = (r0 - r1) * s + (i0 - i1) * c;
+            m_vr[k] = (r0 + r1 + tw_r);
+            m_vr[m_half - k] = (r0 + r1 - tw_r);
+            m_vi[k] = (i0 + i1 + tw_i);
+            m_vi[m_half - k] = (tw_i - i0 - i1);
+        }
+        transformComplex(m_vr, m_vi, m_c, m_d, true);
+        for (int i = 0; i < m_half; ++i) {
+            ro[i*2] = m_c[i];
+            ro[i*2+1] = m_d[i];
+        }
+    }
+    
+    void transformComplex(const double *BQ_R__ ri, const double *BQ_R__ ii,
+                          double *BQ_R__ ro, double *BQ_R__ io,
+                          bool inverse) {
+
+        // Following Don Cross's 1998 implementation, described by its
+        // author as public domain.
+        
+        // Because we are at heart a real-complex fft only, and we know that:
+        const int n = m_half;
+
+        for (int i = 0; i < n; ++i) {
+            int j = m_table[i];
+            ro[j] = ri[i];
+            io[j] = ii[i];
+        }
+        
+        int ix = 0;
+        int blockEnd = 1;
+        double ifactor = (inverse ? -1.0 : 1.0);
+        
+        for (int blockSize = 2; blockSize <= n; blockSize <<= 1) {
+
+            double sm1, sm2, cm1, cm2;
+
+            if (blockSize <= m_maxTabledBlock) {
+                sm1 = ifactor * m_sincos[ix++];
+                sm2 = ifactor * m_sincos[ix++];
+                cm1 = m_sincos[ix++];
+                cm2 = m_sincos[ix++];
+            } else {
+                double phase = 2.0 * M_PI / double(blockSize);
+                sm1 = ifactor * sin(phase);
+                sm2 = ifactor * sin(2.0 * phase);
+                cm1 = cos(phase);
+                cm2 = cos(2.0 * phase);
+            }
+            
+            double w = 2 * cm1;
+            double ar[3], ai[3];
+            
+            for (int i = 0; i < n; i += blockSize) {
+                
+                ar[2] = cm2;
+                ar[1] = cm1;
+                
+                ai[2] = sm2;
+                ai[1] = sm1;
+
+                int j = i;
+                
+                for (int m = 0; m < blockEnd; ++m) {
+                    
+                    ar[0] = w * ar[1] - ar[2];
+                    ar[2] = ar[1];
+                    ar[1] = ar[0];
+
+                    ai[0] = w * ai[1] - ai[2];
+                    ai[2] = ai[1];
+                    ai[1] = ai[0];
+
+                    int k = j + blockEnd;
+                    double tr = ar[0] * ro[k] - ai[0] * io[k];
+                    double ti = ar[0] * io[k] + ai[0] * ro[k];
+
+                    ro[k] = ro[j] - tr;
+                    io[k] = io[j] - ti;
+
+                    ro[j] += tr;
+                    io[j] += ti;
+
+                    ++j;
+                }
+            }
+
+            blockEnd = blockSize;
+        }
+    }
 };
 
-void
-D_Cross::basefft(bool inverse, const double *R__ ri, const double *R__ ii, double *R__ ro, double *R__ io)
-{
-    if (!ri || !ro || !io) return;
-
-    int i, j, k, m;
-    int blockSize, blockEnd;
-
-    double tr, ti;
-
-    double angle = 2.0 * M_PI;
-    if (inverse) angle = -angle;
-
-    const int n = m_size;
-
-    if (ii) {
-	for (i = 0; i < n; ++i) {
-	    ro[m_table[i]] = ri[i];
-        }
-	for (i = 0; i < n; ++i) {
-	    io[m_table[i]] = ii[i];
-	}
-    } else {
-	for (i = 0; i < n; ++i) {
-	    ro[m_table[i]] = ri[i];
-        }
-	for (i = 0; i < n; ++i) {
-	    io[m_table[i]] = 0.0;
-	}
-    }
-
-    blockEnd = 1;
-
-    for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
-
-	double delta = angle / (double)blockSize;
-	double sm2 = -sin(-2 * delta);
-	double sm1 = -sin(-delta);
-	double cm2 = cos(-2 * delta);
-	double cm1 = cos(-delta);
-	double w = 2 * cm1;
-	double ar[3], ai[3];
-
-	for (i = 0; i < n; i += blockSize) {
-
-	    ar[2] = cm2;
-	    ar[1] = cm1;
-
-	    ai[2] = sm2;
-	    ai[1] = sm1;
-
-	    for (j = i, m = 0; m < blockEnd; j++, m++) {
-
-		ar[0] = w * ar[1] - ar[2];
-		ar[2] = ar[1];
-		ar[1] = ar[0];
-
-		ai[0] = w * ai[1] - ai[2];
-		ai[2] = ai[1];
-		ai[1] = ai[0];
-
-		k = j + blockEnd;
-		tr = ar[0] * ro[k] - ai[0] * io[k];
-		ti = ar[0] * io[k] + ai[0] * ro[k];
-
-		ro[k] = ro[j] - tr;
-		io[k] = io[j] - ti;
-
-		ro[j] += tr;
-		io[j] += ti;
-	    }
-	}
-
-	blockEnd = blockSize;
-    }
-
-/* fftw doesn't rescale, so nor will we
-
-    if (inverse) {
-
-	double denom = (double)n;
-
-	for (i = 0; i < n; i++) {
-	    ro[i] /= denom;
-	    io[i] /= denom;
-	}
-    }
-*/
-}
-
 #endif /* USE_BUILTIN_FFT */
 
+class D_DFT : public FFTImpl
+{
+private:
+    template <typename T>
+    class DFT
+    {
+    public:
+        DFT(int size) : m_size(size), m_bins(size/2 + 1) {
+            
+            m_sin = allocate_channels<double>(m_size, m_size);
+            m_cos = allocate_channels<double>(m_size, m_size);
+
+            for (int i = 0; i < m_size; ++i) {
+                for (int j = 0; j < m_size; ++j) {
+                    double arg = (double(i) * double(j) * M_PI * 2.0) / m_size;
+                    m_sin[i][j] = sin(arg);
+                    m_cos[i][j] = cos(arg);
+                }
+            }
+
+            m_tmp = allocate_channels<double>(2, m_size);
+        }
+
+        ~DFT() {
+            deallocate_channels(m_tmp, 2);
+            deallocate_channels(m_sin, m_size);
+            deallocate_channels(m_cos, m_size);
+        }
+
+        void forward(const T *BQ_R__ realIn, T *BQ_R__ realOut, T *BQ_R__ imagOut) {
+            for (int i = 0; i < m_bins; ++i) {
+                double re = 0.0, im = 0.0;
+                for (int j = 0; j < m_size; ++j) re += realIn[j] * m_cos[i][j];
+                for (int j = 0; j < m_size; ++j) im -= realIn[j] * m_sin[i][j];
+                realOut[i] = T(re);
+                imagOut[i] = T(im);
+            }
+        }
+
+        void forwardInterleaved(const T *BQ_R__ realIn, T *BQ_R__ complexOut) {
+            for (int i = 0; i < m_bins; ++i) {
+                double re = 0.0, im = 0.0;
+                for (int j = 0; j < m_size; ++j) re += realIn[j] * m_cos[i][j];
+                for (int j = 0; j < m_size; ++j) im -= realIn[j] * m_sin[i][j];
+                complexOut[i*2] = T(re);
+                complexOut[i*2 + 1] = T(im);
+            }
+        }
+
+        void forwardPolar(const T *BQ_R__ realIn, T *BQ_R__ magOut, T *BQ_R__ phaseOut) {
+            forward(realIn, magOut, phaseOut); // temporarily
+            for (int i = 0; i < m_bins; ++i) {
+                T re = magOut[i], im = phaseOut[i];
+                c_magphase(magOut + i, phaseOut + i, re, im);
+            }
+        }
+
+        void forwardMagnitude(const T *BQ_R__ realIn, T *BQ_R__ magOut) {
+            for (int i = 0; i < m_bins; ++i) {
+                double re = 0.0, im = 0.0;
+                for (int j = 0; j < m_size; ++j) re += realIn[j] * m_cos[i][j];
+                for (int j = 0; j < m_size; ++j) im -= realIn[j] * m_sin[i][j];
+                magOut[i] = T(sqrt(re * re + im * im));
+            }
+        }
+
+        void inverse(const T *BQ_R__ realIn, const T *BQ_R__ imagIn, T *BQ_R__ realOut) {
+            for (int i = 0; i < m_bins; ++i) {
+                m_tmp[0][i] = realIn[i];
+                m_tmp[1][i] = imagIn[i];
+            }
+            for (int i = m_bins; i < m_size; ++i) {
+                m_tmp[0][i] = realIn[m_size - i];
+                m_tmp[1][i] = -imagIn[m_size - i];
+            }
+            for (int i = 0; i < m_size; ++i) {
+                double re = 0.0;
+                const double *const cos = m_cos[i];
+                const double *const sin = m_sin[i];
+                for (int j = 0; j < m_size; ++j) re += m_tmp[0][j] * cos[j];
+                for (int j = 0; j < m_size; ++j) re -= m_tmp[1][j] * sin[j];
+                realOut[i] = T(re);
+            }
+        }
+
+        void inverseInterleaved(const T *BQ_R__ complexIn, T *BQ_R__ realOut) {
+            for (int i = 0; i < m_bins; ++i) {
+                m_tmp[0][i] = complexIn[i*2];
+                m_tmp[1][i] = complexIn[i*2+1];
+            }
+            for (int i = m_bins; i < m_size; ++i) {
+                m_tmp[0][i] = complexIn[(m_size - i) * 2];
+                m_tmp[1][i] = -complexIn[(m_size - i) * 2 + 1];
+            }
+            for (int i = 0; i < m_size; ++i) {
+                double re = 0.0;
+                const double *const cos = m_cos[i];
+                const double *const sin = m_sin[i];
+                for (int j = 0; j < m_size; ++j) re += m_tmp[0][j] * cos[j];
+                for (int j = 0; j < m_size; ++j) re -= m_tmp[1][j] * sin[j];
+                realOut[i] = T(re);
+            }
+        }
+
+        void inversePolar(const T *BQ_R__ magIn, const T *BQ_R__ phaseIn, T *BQ_R__ realOut) {
+            T *complexIn = allocate<T>(m_bins * 2);
+            v_polar_to_cartesian_interleaved(complexIn, magIn, phaseIn, m_bins);
+            inverseInterleaved(complexIn, realOut);
+            deallocate(complexIn);
+        }
+
+        void inverseCepstral(const T *BQ_R__ magIn, T *BQ_R__ cepOut) {
+            T *complexIn = allocate_and_zero<T>(m_bins * 2);
+            for (int i = 0; i < m_bins; ++i) {
+                complexIn[i*2] = T(log(magIn[i] + 0.000001));
+            }
+            inverseInterleaved(complexIn, cepOut);
+            deallocate(complexIn);
+        }
+
+    private:
+        const int m_size;
+        const int m_bins;
+        double **m_sin;
+        double **m_cos;
+        double **m_tmp;
+    };
+    
+public:
+    D_DFT(int size) : m_size(size), m_double(0), m_float(0) { }
+
+    ~D_DFT() {
+        delete m_double;
+        delete m_float;
+    }
+
+    int getSize() const {
+        return m_size;
+    }
+
+    FFT::Precisions
+    getSupportedPrecisions() const {
+        return FFT::DoublePrecision;
+    }
+
+    void initFloat() {
+        if (!m_float) {
+            m_float = new DFT<float>(m_size);
+        }
+    }
+        
+    void initDouble() {
+        if (!m_double) {
+            m_double = new DFT<double>(m_size);
+        }
+    }
+
+    void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) {
+        initDouble();
+        m_double->forward(realIn, realOut, imagOut);
+    }
+
+    void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) {
+        initDouble();
+        m_double->forwardInterleaved(realIn, complexOut);
+    }
+
+    void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) {
+        initDouble();
+        m_double->forwardPolar(realIn, magOut, phaseOut);
+    }
+
+    void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) {
+        initDouble();
+        m_double->forwardMagnitude(realIn, magOut);
+    }
+
+    void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) {
+        initFloat();
+        m_float->forward(realIn, realOut, imagOut);
+    }
+
+    void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) {
+        initFloat();
+        m_float->forwardInterleaved(realIn, complexOut);
+    }
+
+    void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) {
+        initFloat();
+        m_float->forwardPolar(realIn, magOut, phaseOut);
+    }
+
+    void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) {
+        initFloat();
+        m_float->forwardMagnitude(realIn, magOut);
+    }
+
+    void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) {
+        initDouble();
+        m_double->inverse(realIn, imagIn, realOut);
+    }
+
+    void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) {
+        initDouble();
+        m_double->inverseInterleaved(complexIn, realOut);
+    }
+
+    void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) {
+        initDouble();
+        m_double->inversePolar(magIn, phaseIn, realOut);
+    }
+
+    void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) {
+        initDouble();
+        m_double->inverseCepstral(magIn, cepOut);
+    }
+
+    void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) {
+        initFloat();
+        m_float->inverse(realIn, imagIn, realOut);
+    }
+
+    void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) {
+        initFloat();
+        m_float->inverseInterleaved(complexIn, realOut);
+    }
+
+    void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) {
+        initFloat();
+        m_float->inversePolar(magIn, phaseIn, realOut);
+    }
+
+    void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) {
+        initFloat();
+        m_float->inverseCepstral(magIn, cepOut);
+    }
+
+private:
+    int m_size;
+    DFT<double> *m_double;
+    DFT<float> *m_float;
+};
+
 } /* end namespace FFTs */
 
-std::string
-FFT::m_implementation;
+enum SizeConstraint {
+    SizeConstraintNone           = 0x0,
+    SizeConstraintEven           = 0x1,
+    SizeConstraintPowerOfTwo     = 0x2,
+    SizeConstraintEvenPowerOfTwo = 0x3 // i.e. 0x1 | 0x2. Excludes size 1 obvs
+};
+
+typedef std::map<std::string, SizeConstraint> ImplMap;
+
+static std::string defaultImplementation;
+
+static ImplMap
+getImplementationDetails()
+{
+    ImplMap impls;
+    
+#ifdef HAVE_IPP
+    impls["ipp"] = SizeConstraintEvenPowerOfTwo;
+#endif
+#ifdef HAVE_FFTW3
+    impls["fftw"] = SizeConstraintNone;
+#endif
+#ifdef HAVE_KISSFFT
+    impls["kissfft"] = SizeConstraintEven;
+#endif
+#ifdef HAVE_VDSP
+    impls["vdsp"] = SizeConstraintEvenPowerOfTwo;
+#endif
+#ifdef USE_BUILTIN_FFT
+    impls["builtin"] = SizeConstraintEvenPowerOfTwo;
+#endif
+
+    impls["dft"] = SizeConstraintNone;
+
+    return impls;
+}
+
+static std::string
+pickImplementation(int size)
+{
+    ImplMap impls = getImplementationDetails();
+
+    bool isPowerOfTwo = !(size & (size-1));
+    bool isEven = !(size & 1);
+
+    if (defaultImplementation != "") {
+        ImplMap::const_iterator itr = impls.find(defaultImplementation);
+        if (itr != impls.end()) {
+            if (((itr->second & SizeConstraintPowerOfTwo) && !isPowerOfTwo) ||
+                ((itr->second & SizeConstraintEven) && !isEven)) {
+//                std::cerr << "NOTE: bqfft: Explicitly-set default "
+//                          << "implementation \"" << defaultImplementation
+//                          << "\" does not support size " << size
+//                          << ", trying other compiled-in implementations"
+//                          << std::endl;
+            } else {
+                return defaultImplementation;
+            }
+        } else {
+            std::cerr << "WARNING: bqfft: Default implementation \""
+                      << defaultImplementation << "\" is not compiled in"
+                      << std::endl;
+        }
+    } 
+    
+    std::string preference[] = {
+        "ipp", "vdsp", "fftw", "builtin", "kissfft"
+    };
+
+    for (int i = 0; i < int(sizeof(preference)/sizeof(preference[0])); ++i) {
+        ImplMap::const_iterator itr = impls.find(preference[i]);
+        if (itr != impls.end()) {
+            if ((itr->second & SizeConstraintPowerOfTwo) &&
+                // out of an abundance of caution we don't attempt to
+                // use power-of-two implementations with size 2
+                // either, as they may involve a half-half
+                // complex-complex underneath (which would end up with
+                // size 0)
+                (!isPowerOfTwo || size < 4)) {
+                continue;
+            }
+            if ((itr->second & SizeConstraintEven) && !isEven) {
+                continue;
+            }
+            return preference[i];
+        }
+    }
+
+    std::cerr << "WARNING: bqfft: No compiled-in implementation supports size "
+              << size << ", falling back to slow DFT" << std::endl;
+    
+    return "dft";
+}
 
 std::set<std::string>
 FFT::getImplementations()
 {
-    std::set<std::string> impls;
-#ifdef HAVE_IPP
-    impls.insert("ipp");
-#endif
-#ifdef HAVE_FFTW3
-    impls.insert("fftw");
-#endif
-#ifdef USE_KISSFFT
-    impls.insert("kissfft");
-#endif
-#ifdef HAVE_VDSP
-    impls.insert("vdsp");
-#endif
-#ifdef HAVE_MEDIALIB
-    impls.insert("medialib");
-#endif
-#ifdef HAVE_OPENMAX
-    impls.insert("openmax");
-#endif
-#ifdef HAVE_SFFT
-    impls.insert("sfft");
-#endif
-#ifdef USE_BUILTIN_FFT
-    impls.insert("cross");
-#endif
-    return impls;
-}
-
-void
-FFT::pickDefaultImplementation()
-{
-    if (m_implementation != "") return;
-
-    std::set<std::string> impls = getImplementations();
-
-    std::string best = "cross";
-    if (impls.find("kissfft") != impls.end()) best = "kissfft";
-    if (impls.find("medialib") != impls.end()) best = "medialib";
-    if (impls.find("openmax") != impls.end()) best = "openmax";
-    if (impls.find("sfft") != impls.end()) best = "sfft";
-    if (impls.find("fftw") != impls.end()) best = "fftw";
-    if (impls.find("vdsp") != impls.end()) best = "vdsp";
-    if (impls.find("ipp") != impls.end()) best = "ipp";
-    
-    m_implementation = best;
+    ImplMap impls = getImplementationDetails();
+    std::set<std::string> toReturn;
+    for (ImplMap::const_iterator i = impls.begin(); i != impls.end(); ++i) {
+        toReturn.insert(i->first);
+    }
+    return toReturn;
 }
 
 std::string
 FFT::getDefaultImplementation()
 {
-    return m_implementation;
+    return defaultImplementation;
 }
 
 void
 FFT::setDefaultImplementation(std::string i)
 {
-    m_implementation = i;
+    if (i == "") {
+        defaultImplementation = i;
+        return;
+    } 
+    ImplMap impls = getImplementationDetails();
+    ImplMap::const_iterator itr = impls.find(i);
+    if (itr == impls.end()) {
+        std::cerr << "WARNING: bqfft: setDefaultImplementation: "
+                  << "requested implementation \"" << i
+                  << "\" is not compiled in" << std::endl;
+    } else {
+        defaultImplementation = i;
+    }
 }
 
 FFT::FFT(int size, int debugLevel) :
     d(0)
 {
-    if ((size < 2) ||
-        (size & (size-1))) {
-        std::cerr << "FFT::FFT(" << size << "): power-of-two sizes only supported, minimum size 2" << std::endl;
-#ifndef NO_EXCEPTIONS
-        throw InvalidSize;
-#else
-        abort();
-#endif
-    }
-
-    if (m_implementation == "") pickDefaultImplementation();
-    std::string impl = m_implementation;
+    std::string impl = pickImplementation(size);
 
     if (debugLevel > 0) {
         std::cerr << "FFT::FFT(" << size << "): using implementation: "
@@ -3181,29 +2390,19 @@ FFT::FFT(int size, int debugLevel) :
         d = new FFTs::D_FFTW(size);
 #endif
     } else if (impl == "kissfft") {        
-#ifdef USE_KISSFFT
+#ifdef HAVE_KISSFFT
         d = new FFTs::D_KISSFFT(size);
 #endif
     } else if (impl == "vdsp") {
 #ifdef HAVE_VDSP
         d = new FFTs::D_VDSP(size);
 #endif
-    } else if (impl == "medialib") {
-#ifdef HAVE_MEDIALIB
-        d = new FFTs::D_MEDIALIB(size);
-#endif
-    } else if (impl == "openmax") {
-#ifdef HAVE_OPENMAX
-        d = new FFTs::D_OPENMAX(size);
-#endif
-    } else if (impl == "sfft") {
-#ifdef HAVE_SFFT
-        d = new FFTs::D_SFFT(size);
-#endif
-    } else if (impl == "cross") {
+    } else if (impl == "builtin") {
 #ifdef USE_BUILTIN_FFT
-        d = new FFTs::D_Cross(size);
+        d = new FFTs::D_Builtin(size);
 #endif
+    } else if (impl == "dft") {
+        d = new FFTs::D_DFT(size);
     }
 
     if (!d) {
@@ -3238,9 +2437,8 @@ FFT::~FFT()
 #endif
 
 void
-FFT::forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut)
+FFT::forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut)
 {
-    Profiler profiler("FFT::forward");
     CHECK_NOT_NULL(realIn);
     CHECK_NOT_NULL(realOut);
     CHECK_NOT_NULL(imagOut);
@@ -3248,18 +2446,16 @@ FFT::forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut)
 }
 
 void
-FFT::forwardInterleaved(const double *R__ realIn, double *R__ complexOut)
+FFT::forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut)
 {
-    Profiler profiler("FFT::forwardInterleaved");
     CHECK_NOT_NULL(realIn);
     CHECK_NOT_NULL(complexOut);
     d->forwardInterleaved(realIn, complexOut);
 }
 
 void
-FFT::forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phaseOut)
+FFT::forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut)
 {
-    Profiler profiler("FFT::forwardPolar");
     CHECK_NOT_NULL(realIn);
     CHECK_NOT_NULL(magOut);
     CHECK_NOT_NULL(phaseOut);
@@ -3267,18 +2463,16 @@ FFT::forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phas
 }
 
 void
-FFT::forwardMagnitude(const double *R__ realIn, double *R__ magOut)
+FFT::forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut)
 {
-    Profiler profiler("FFT::forwardMagnitude");
     CHECK_NOT_NULL(realIn);
     CHECK_NOT_NULL(magOut);
     d->forwardMagnitude(realIn, magOut);
 }
 
 void
-FFT::forward(const float *R__ realIn, float *R__ realOut, float *R__ imagOut)
+FFT::forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut)
 {
-    Profiler profiler("FFT::forward[float]");
     CHECK_NOT_NULL(realIn);
     CHECK_NOT_NULL(realOut);
     CHECK_NOT_NULL(imagOut);
@@ -3286,18 +2480,16 @@ FFT::forward(const float *R__ realIn, float *R__ realOut, float *R__ imagOut)
 }
 
 void
-FFT::forwardInterleaved(const float *R__ realIn, float *R__ complexOut)
+FFT::forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut)
 {
-    Profiler profiler("FFT::forwardInterleaved[float]");
     CHECK_NOT_NULL(realIn);
     CHECK_NOT_NULL(complexOut);
     d->forwardInterleaved(realIn, complexOut);
 }
 
 void
-FFT::forwardPolar(const float *R__ realIn, float *R__ magOut, float *R__ phaseOut)
+FFT::forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut)
 {
-    Profiler profiler("FFT::forwardPolar[float]");
     CHECK_NOT_NULL(realIn);
     CHECK_NOT_NULL(magOut);
     CHECK_NOT_NULL(phaseOut);
@@ -3305,18 +2497,16 @@ FFT::forwardPolar(const float *R__ realIn, float *R__ magOut, float *R__ phaseOu
 }
 
 void
-FFT::forwardMagnitude(const float *R__ realIn, float *R__ magOut)
+FFT::forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut)
 {
-    Profiler profiler("FFT::forwardMagnitude[float]");
     CHECK_NOT_NULL(realIn);
     CHECK_NOT_NULL(magOut);
     d->forwardMagnitude(realIn, magOut);
 }
 
 void
-FFT::inverse(const double *R__ realIn, const double *R__ imagIn, double *R__ realOut)
+FFT::inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut)
 {
-    Profiler profiler("FFT::inverse");
     CHECK_NOT_NULL(realIn);
     CHECK_NOT_NULL(imagIn);
     CHECK_NOT_NULL(realOut);
@@ -3324,18 +2514,16 @@ FFT::inverse(const double *R__ realIn, const double *R__ imagIn, double *R__ rea
 }
 
 void
-FFT::inverseInterleaved(const double *R__ complexIn, double *R__ realOut)
+FFT::inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut)
 {
-    Profiler profiler("FFT::inverseInterleaved");
     CHECK_NOT_NULL(complexIn);
     CHECK_NOT_NULL(realOut);
     d->inverseInterleaved(complexIn, realOut);
 }
 
 void
-FFT::inversePolar(const double *R__ magIn, const double *R__ phaseIn, double *R__ realOut)
+FFT::inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut)
 {
-    Profiler profiler("FFT::inversePolar");
     CHECK_NOT_NULL(magIn);
     CHECK_NOT_NULL(phaseIn);
     CHECK_NOT_NULL(realOut);
@@ -3343,18 +2531,16 @@ FFT::inversePolar(const double *R__ magIn, const double *R__ phaseIn, double *R_
 }
 
 void
-FFT::inverseCepstral(const double *R__ magIn, double *R__ cepOut)
+FFT::inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut)
 {
-    Profiler profiler("FFT::inverseCepstral");
     CHECK_NOT_NULL(magIn);
     CHECK_NOT_NULL(cepOut);
     d->inverseCepstral(magIn, cepOut);
 }
 
 void
-FFT::inverse(const float *R__ realIn, const float *R__ imagIn, float *R__ realOut)
+FFT::inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut)
 {
-    Profiler profiler("FFT::inverse[float]");
     CHECK_NOT_NULL(realIn);
     CHECK_NOT_NULL(imagIn);
     CHECK_NOT_NULL(realOut);
@@ -3362,18 +2548,16 @@ FFT::inverse(const float *R__ realIn, const float *R__ imagIn, float *R__ realOu
 }
 
 void
-FFT::inverseInterleaved(const float *R__ complexIn, float *R__ realOut)
+FFT::inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut)
 {
-    Profiler profiler("FFT::inverseInterleaved[float]");
     CHECK_NOT_NULL(complexIn);
     CHECK_NOT_NULL(realOut);
     d->inverseInterleaved(complexIn, realOut);
 }
 
 void
-FFT::inversePolar(const float *R__ magIn, const float *R__ phaseIn, float *R__ realOut)
+FFT::inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut)
 {
-    Profiler profiler("FFT::inversePolar[float]");
     CHECK_NOT_NULL(magIn);
     CHECK_NOT_NULL(phaseIn);
     CHECK_NOT_NULL(realOut);
@@ -3381,9 +2565,8 @@ FFT::inversePolar(const float *R__ magIn, const float *R__ phaseIn, float *R__ r
 }
 
 void
-FFT::inverseCepstral(const float *R__ magIn, float *R__ cepOut)
+FFT::inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut)
 {
-    Profiler profiler("FFT::inverseCepstral[float]");
     CHECK_NOT_NULL(magIn);
     CHECK_NOT_NULL(cepOut);
     d->inverseCepstral(magIn, cepOut);
@@ -3401,6 +2584,12 @@ FFT::initDouble()
     d->initDouble();
 }
 
+int
+FFT::getSize() const
+{
+    return d->getSize();
+}
+
 FFT::Precisions
 FFT::getSupportedPrecisions() const
 {
@@ -3409,18 +2598,27 @@ FFT::getSupportedPrecisions() const
 
 #ifdef FFT_MEASUREMENT
 
+#ifdef FFT_MEASUREMENT_RETURN_RESULT_TEXT
 std::string
+#else
+void
+#endif
 FFT::tune()
 {
+#ifdef FFT_MEASUREMENT_RETURN_RESULT_TEXT
     std::ostringstream os;
+#else
+#define os std::cerr
+#endif
     os << "FFT::tune()..." << std::endl;
 
     std::vector<int> sizes;
-    std::map<FFTImpl *, int> candidates;
-    std::map<int, int> wins;
+    std::map<std::string, FFTImpl *> candidates;
+    std::map<std::string, int> wins;
 
     sizes.push_back(512);
     sizes.push_back(1024);
+    sizes.push_back(2048);
     sizes.push_back(4096);
     
     for (unsigned int si = 0; si < sizes.size(); ++si) {
@@ -3428,18 +2626,18 @@ FFT::tune()
         int size = sizes[si];
 
         while (!candidates.empty()) {
-            delete candidates.begin()->first;
+            delete candidates.begin()->second;
             candidates.erase(candidates.begin());
         }
 
         FFTImpl *d;
         
 #ifdef HAVE_IPP
-        std::cerr << "Constructing new IPP FFT object for size " << size << "..." << std::endl;
+        os << "Constructing new IPP FFT object for size " << size << "..." << std::endl;
         d = new FFTs::D_IPP(size);
         d->initFloat();
         d->initDouble();
-        candidates[d] = 0;
+        candidates["ipp"] = d;
 #endif
         
 #ifdef HAVE_FFTW3
@@ -3447,23 +2645,23 @@ FFT::tune()
         d = new FFTs::D_FFTW(size);
         d->initFloat();
         d->initDouble();
-        candidates[d] = 1;
+        candidates["fftw"] = d;
 #endif
 
-#ifdef USE_KISSFFT
+#ifdef HAVE_KISSFFT
         os << "Constructing new KISSFFT object for size " << size << "..." << std::endl;
         d = new FFTs::D_KISSFFT(size);
         d->initFloat();
         d->initDouble();
-        candidates[d] = 2;
+        candidates["kissfft"] = d;
 #endif        
 
 #ifdef USE_BUILTIN_FFT
-        os << "Constructing new Cross FFT object for size " << size << "..." << std::endl;
-        d = new FFTs::D_Cross(size);
+        os << "Constructing new Builtin FFT object for size " << size << "..." << std::endl;
+        d = new FFTs::D_Builtin(size);
         d->initFloat();
         d->initDouble();
-        candidates[d] = 3;
+        candidates["builtin"] = d;
 #endif
         
 #ifdef HAVE_VDSP
@@ -3471,40 +2669,22 @@ FFT::tune()
         d = new FFTs::D_VDSP(size);
         d->initFloat();
         d->initDouble();
-        candidates[d] = 4;
+        candidates["vdsp"] = d;
 #endif
-        
-#ifdef HAVE_MEDIALIB
-        std::cerr << "Constructing new MediaLib FFT object for size " << size << "..." << std::endl;
-        d = new FFTs::D_MEDIALIB(size);
+
+        os << "Constructing new DFT object for size " << size << "..." << std::endl;
+        d = new FFTs::D_DFT(size);
         d->initFloat();
         d->initDouble();
-        candidates[d] = 5;
-#endif
-        
-#ifdef HAVE_OPENMAX
-        os << "Constructing new OpenMAX FFT object for size " << size << "..." << std::endl;
-        d = new FFTs::D_OPENMAX(size);
-        d->initFloat();
-        d->initDouble();
-        candidates[d] = 6;
-#endif
-        
-#ifdef HAVE_SFFT
-        os << "Constructing new SFFT FFT object for size " << size << "..." << std::endl;
-        d = new FFTs::D_SFFT(size);
-//        d->initFloat();
-        d->initDouble();
-        candidates[d] = 6;
-#endif
+        candidates["dft"] = d;
 
         os << "CLOCKS_PER_SEC = " << CLOCKS_PER_SEC << std::endl;
         float divisor = float(CLOCKS_PER_SEC) / 1000.f;
         
         os << "Timing order is: ";
-        for (std::map<FFTImpl *, int>::iterator ci = candidates.begin();
+        for (std::map<std::string, FFTImpl *>::iterator ci = candidates.begin();
              ci != candidates.end(); ++ci) {
-            os << ci->second << " ";
+            os << ci->first << " ";
         }
         os << std::endl;
 
@@ -3556,8 +2736,8 @@ FFT::tune()
                 fi[i] = di[i];
             }
 
-            int low = -1;
-            int lowscore = 0;
+            std::string low;
+            clock_t lowscore = 0;
 
             const char *names[] = {
 
@@ -3581,10 +2761,10 @@ FFT::tune()
             };
             os << names[type] << " :: ";
 
-            for (std::map<FFTImpl *, int>::iterator ci = candidates.begin();
+            for (std::map<std::string, FFTImpl *>::iterator ci = candidates.begin();
                  ci != candidates.end(); ++ci) {
 
-                FFTImpl *d = ci->first;
+                FFTImpl *d = ci->second;
 
                 double mean = 0;
 
@@ -3640,8 +2820,8 @@ FFT::tune()
 
                 os << float(end - start)/divisor << " (" << mean << ") ";
 
-                if (low == -1 || (end - start) < lowscore) {
-                    low = ci->second;
+                if (low == "" || (end - start) < lowscore) {
+                    low = ci->first;
                     lowscore = end - start;
                 }
             }
@@ -3664,15 +2844,15 @@ FFT::tune()
     }
 
     while (!candidates.empty()) {
-        delete candidates.begin()->first;
+        delete candidates.begin()->second;
         candidates.erase(candidates.begin());
     }
 
     int bestscore = 0;
-    int best = -1;
+    std::string best;
 
-    for (std::map<int, int>::iterator wi = wins.begin(); wi != wins.end(); ++wi) {
-        if (best == -1 || wi->second > bestscore) {
+    for (std::map<std::string, int>::iterator wi = wins.begin(); wi != wins.end(); ++wi) {
+        if (best == "" || wi->second > bestscore) {
             best = wi->first;
             bestscore = wi->second;
         }
@@ -3680,7 +2860,9 @@ FFT::tune()
 
     os << "overall winner is " << best << " with " << bestscore << " wins" << std::endl;
 
+#ifdef FFT_MEASUREMENT_RETURN_RESULT_TEXT
     return os.str();
+#endif
 }
 
 #endif
diff --git a/src/dsp/FFT.h b/src/dsp/FFT.h
index 55c763b..fbf36a0 100644
--- a/src/dsp/FFT.h
+++ b/src/dsp/FFT.h
@@ -64,6 +64,8 @@ public:
     FFT(int size, int debugLevel = 0); // may throw InvalidSize
     ~FFT();
 
+    int getSize() const;
+    
     void forward(const double *R__ realIn, double *R__ realOut, double *R__ imagOut);
     void forwardInterleaved(const double *R__ realIn, double *R__ complexOut);
     void forwardPolar(const double *R__ realIn, double *R__ magOut, double *R__ phaseOut);
@@ -121,6 +123,10 @@ protected:
     FFTImpl *d;
     static std::string m_implementation;
     static void pickDefaultImplementation();
+    
+private:
+    FFT(const FFT &); // not provided
+    FFT &operator=(const FFT &); // not provided
 };
 
 }
diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp
index 06199a4..dda917d 100644
--- a/src/dsp/Resampler.cpp
+++ b/src/dsp/Resampler.cpp
@@ -1,4 +1,4 @@
-/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
 
 /*
     Rubber Band Library
@@ -22,7 +22,9 @@
 */
 
 #include "Resampler.h"
-#include "base/Profiler.h"
+
+#include "system/Allocators.h"
+#include "system/VectorOps.h"
 
 #include <cstdlib>
 #include <cmath>
@@ -30,9 +32,6 @@
 #include <iostream>
 #include <algorithm>
 
-#include "system/Allocators.h"
-#include "system/VectorOps.h"
-
 #ifdef HAVE_IPP
 #include <ippversion.h>
 #if (IPP_VERSION_MAJOR < 7)
@@ -42,6 +41,10 @@
 #endif
 #endif
 
+#ifdef HAVE_SAMPLERATE
+#define HAVE_LIBSAMPLERATE 1
+#endif
+
 #ifdef HAVE_LIBSAMPLERATE
 #include <samplerate.h>
 #endif
@@ -51,18 +54,26 @@
 #endif
 
 #ifdef USE_SPEEX
-#include "speex/speex_resampler.h"
+#include "../speex/speex_resampler.h"
+#endif
+
+#ifdef USE_BQRESAMPLER
+#include "BQResampler.h"
 #endif
 
 #ifndef HAVE_IPP
 #ifndef HAVE_LIBSAMPLERATE
 #ifndef HAVE_LIBRESAMPLE
 #ifndef USE_SPEEX
+#ifndef USE_BQRESAMPLER
 #error No resampler implementation selected!
 #endif
 #endif
 #endif
 #endif
+#endif
+
+#define BQ_R__ R__
 
 using namespace std;
 
@@ -73,16 +84,16 @@ class Resampler::Impl
 public:
     virtual ~Impl() { }
     
-    virtual int resample(float *const R__ *const R__ out,
+    virtual int resample(float *const BQ_R__ *const BQ_R__ out,
                          int outcount,
-                         const float *const R__ *const R__ in, 
+                         const float *const BQ_R__ *const BQ_R__ in, 
                          int incount,
                          double ratio,
                          bool final) = 0;
     
-    virtual int resampleInterleaved(float *const R__ out,
+    virtual int resampleInterleaved(float *const BQ_R__ out,
                                     int outcount,
-                                    const float *const R__ in, 
+                                    const float *const BQ_R__ in, 
                                     int incount,
                                     double ratio,
                                     bool final) = 0;
@@ -99,20 +110,21 @@ namespace Resamplers {
 class D_IPP : public Resampler::Impl
 {
 public:
-    D_IPP(Resampler::Quality quality, int channels, double initialSampleRate,
+    D_IPP(Resampler::Quality quality, Resampler::RatioChange,
+          int channels, double initialSampleRate,
           int maxBufferSize, int debugLevel);
     ~D_IPP();
 
-    int resample(float *const R__ *const R__ out,
+    int resample(float *const BQ_R__ *const BQ_R__ out,
                  int outcount,
-                 const float *const R__ *const R__ in,
+                 const float *const BQ_R__ *const BQ_R__ in,
                  int incount,
                  double ratio,
                  bool final);
 
-    int resampleInterleaved(float *const R__ out,
+    int resampleInterleaved(float *const BQ_R__ out,
                             int outcount,
-                            const float *const R__ in,
+                            const float *const BQ_R__ in,
                             int incount,
                             double ratio,
                             bool final = false);
@@ -144,6 +156,7 @@ protected:
 };
 
 D_IPP::D_IPP(Resampler::Quality /* quality */,
+             Resampler::RatioChange /* ratioChange */,
              int channels, double initialSampleRate,
              int maxBufferSize, int debugLevel) :
     m_state(0),
@@ -152,7 +165,7 @@ D_IPP::D_IPP(Resampler::Quality /* quality */,
     m_debugLevel(debugLevel)
 {
     if (m_debugLevel > 0) {
-        cerr << "Resampler::Resampler: using IPP implementation" << endl;
+        cerr << "Resampler::Resampler: using implementation: IPP" << endl;
     }
 
     m_window = 32;
@@ -190,7 +203,7 @@ D_IPP::D_IPP(Resampler::Quality /* quality */,
     setBufSize(maxBufferSize + m_history);
 
     if (m_debugLevel > 1) {
-        cerr << "D_IPP: bufsize = " << m_bufsize << ", window = " << m_window << ", nStep = " << nStep << ", history = " << m_history << endl;
+        cerr << "bufsize = " << m_bufsize << ", window = " << m_window << ", nStep = " << nStep << ", history = " << m_history << endl;
     }
 
     int specSize = 0;
@@ -214,18 +227,13 @@ D_IPP::D_IPP(Resampler::Quality /* quality */,
                                       9.0f,
                                       m_state[c],
                                       hint);
-
-        if (m_debugLevel > 1) {
-            cerr << "D_IPP: Resampler state size = " << specSize << ", allocated at "
-                 << m_state[c] << endl;
-        }
         
         m_lastread[c] = m_history;
         m_time[c] = m_history;
     }
 
     if (m_debugLevel > 1) {
-        cerr << "D_IPP: Resampler init done" << endl;
+        cerr << "Resampler init done" << endl;
     }
 }
 
@@ -248,9 +256,9 @@ D_IPP::setBufSize(int sz)
 {
     if (m_debugLevel > 1) {
         if (m_bufsize > 0) {
-            cerr << "D_IPP: resize bufsize " << m_bufsize << " -> ";
+            cerr << "resize bufsize " << m_bufsize << " -> ";
         } else {
-            cerr << "D_IPP: initialise bufsize to ";
+            cerr << "initialise bufsize to ";
         }
     }
 
@@ -263,13 +271,13 @@ D_IPP::setBufSize(int sz)
     int n1 = m_bufsize + m_history + 2;
 
     if (m_debugLevel > 1) {
-        cerr << "D_IPP: inbuf allocating " << m_bufsize << " + " << m_history << " + 2 = " << n1 << endl;
+        cerr << "inbuf allocating " << m_bufsize << " + " << m_history << " + 2 = " << n1 << endl;
     }
 
     int n2 = (int)lrintf(ceil((m_bufsize - m_history) * m_factor + 2));
 
     if (m_debugLevel > 1) {
-        cerr << "D_IPP: outbuf allocating (" << m_bufsize << " - " << m_history << ") * " << m_factor << " + 2 = " << n2 << endl;
+        cerr << "outbuf allocating (" << m_bufsize << " - " << m_history << ") * " << m_factor << " + 2 = " << n2 << endl;
     }
 
     m_inbuf = reallocate_and_zero_extend_channels
@@ -277,30 +285,15 @@ D_IPP::setBufSize(int sz)
 
     m_outbuf = reallocate_and_zero_extend_channels
         (m_outbuf, m_channels, m_outbufsz, m_channels, n2);
-    
+            
     m_inbufsz = n1;
     m_outbufsz = n2;
-
-    if (m_debugLevel > 2) {
-
-        cerr << "D_IPP: inbuf ptr = " << m_inbuf << ", channel inbufs ";
-        for (int c = 0; c < m_channels; ++c) {
-            cerr << m_inbuf[c] << " ";
-        }
-        cerr << "at " << m_inbufsz * sizeof(float) << " bytes each" << endl;
-
-        cerr << "D_IPP: outbuf ptr = " << m_outbuf << ", channel outbufs ";
-        for (int c = 0; c < m_channels; ++c) {
-            cerr << m_outbuf[c] << " ";
-        }
-        cerr << "at " << m_outbufsz * sizeof(float) << " bytes each" << endl;
-    }        
 }
 
 int
-D_IPP::resample(float *const R__ *const R__ out,
+D_IPP::resample(float *const BQ_R__ *const BQ_R__ out,
                 int outspace,
-                const float *const R__ *const R__ in,
+                const float *const BQ_R__ *const BQ_R__ in,
                 int incount,
                 double ratio,
                 bool final)
@@ -311,7 +304,7 @@ D_IPP::resample(float *const R__ *const R__ out,
     }
 
     if (m_debugLevel > 2) {
-        cerr << "D_IPP: incount = " << incount << ", ratio = " << ratio << ", est space = " << lrintf(ceil(incount * ratio)) << ", outspace = " << outspace << ", final = " << final << endl;
+        cerr << "incount = " << incount << ", ratio = " << ratio << ", est space = " << lrintf(ceil(incount * ratio)) << ", outspace = " << outspace << ", final = " << final << endl;
     }
 
     for (int c = 0; c < m_channels; ++c) {
@@ -328,7 +321,7 @@ D_IPP::resample(float *const R__ *const R__ out,
     }
 
     if (m_debugLevel > 2) {
-        cerr << "D_IPP: lastread advanced to " << m_lastread[0] << endl;
+        cerr << "lastread advanced to " << m_lastread[0] << endl;
     }
 
     int got = doResample(outspace, ratio, final);
@@ -341,9 +334,9 @@ D_IPP::resample(float *const R__ *const R__ out,
 }
 
 int
-D_IPP::resampleInterleaved(float *const R__ out,
+D_IPP::resampleInterleaved(float *const BQ_R__ out,
                            int outspace,
-                           const float *const R__ in,
+                           const float *const BQ_R__ in,
                            int incount,
                            double ratio,
                            bool final)
@@ -354,7 +347,7 @@ D_IPP::resampleInterleaved(float *const R__ out,
     }
 
     if (m_debugLevel > 2) {
-        cerr << "D_IPP: incount = " << incount << ", ratio = " << ratio << ", est space = " << lrintf(ceil(incount * ratio)) << ", outspace = " << outspace << ", final = " << final << endl;
+        cerr << "incount = " << incount << ", ratio = " << ratio << ", est space = " << lrintf(ceil(incount * ratio)) << ", outspace = " << outspace << ", final = " << final << endl;
     }
 
     for (int c = 0; c < m_channels; ++c) {
@@ -371,7 +364,7 @@ D_IPP::resampleInterleaved(float *const R__ out,
     }
 
     if (m_debugLevel > 2) {
-        cerr << "D_IPP: lastread advanced to " << m_lastread[0] << " after injection of "
+        cerr << "lastread advanced to " << m_lastread[0] << " after injection of "
              << incount << " samples" << endl;
     }
 
@@ -392,20 +385,20 @@ D_IPP::doResample(int outspace, double ratio, bool final)
         int n = m_lastread[c] - m_history - int(m_time[c]);
 
         if (c == 0 && m_debugLevel > 2) {
-            cerr << "D_IPP: at start, lastread = " << m_lastread[c] << ", history = "
+            cerr << "at start, lastread = " << m_lastread[c] << ", history = "
                  << m_history << ", time = " << m_time[c] << ", therefore n = "
                  << n << endl;
         }
 
         if (n <= 0) {
             if (c == 0 && m_debugLevel > 1) {
-                cerr << "D_IPP: not enough input samples to do anything" << endl;
+                cerr << "not enough input samples to do anything" << endl;
             }
             continue;
         }
         
         if (c == 0 && m_debugLevel > 2) {
-            cerr << "D_IPP: before resample call, time = " << m_time[c] << endl;
+            cerr << "before resample call, time = " << m_time[c] << endl;
         }
 
         // We're committed to not overrunning outspace, so we need to
@@ -414,7 +407,7 @@ D_IPP::doResample(int outspace, double ratio, bool final)
         int limit = int(floor(outspace / ratio));
         if (n > limit) {
             if (c == 0 && m_debugLevel > 1) {
-                cerr << "D_IPP: trimming input samples from " << n << " to " << limit
+                cerr << "trimming input samples from " << n << " to " << limit
                      << " to avoid overrunning " << outspace << " at output"
                      << endl;
             }
@@ -431,26 +424,26 @@ D_IPP::doResample(int outspace, double ratio, bool final)
                                   m_state[c]);
 
         int t = int(floor(m_time[c]));
-
+        
         int moveFrom = t - m_history;
-
+        
         if (c == 0 && m_debugLevel > 2) {
-            cerr << "D_IPP: converted " << n << " samples to " << outcount
+            cerr << "converted " << n << " samples to " << outcount
                  << " (nb outbufsz = " << m_outbufsz
                  << "), time advanced to " << m_time[c] << endl;
-            cerr << "D_IPP: rounding time to " << t << ", lastread = "
+            cerr << "rounding time to " << t << ", lastread = "
                  << m_lastread[c] << ", history = " << m_history << endl;
-            cerr << "D_IPP: will move " << m_lastread[c] - moveFrom
+            cerr << "will move " << m_lastread[c] - moveFrom
                  << " unconverted samples back from index " << moveFrom
                  << " to 0" << endl;
         }
-        
+
         if (moveFrom >= m_lastread[c]) {
 
             moveFrom = m_lastread[c];
 
             if (c == 0 && m_debugLevel > 2) {
-                cerr << "D_IPP: number of samples to move is <= 0, "
+                cerr << "number of samples to move is <= 0, "
                      << "not actually moving any" << endl;
             }
         } else {
@@ -464,7 +457,7 @@ D_IPP::doResample(int outspace, double ratio, bool final)
         m_time[c] -= moveFrom;
 
         if (c == 0 && m_debugLevel > 2) {
-            cerr << "D_IPP: lastread reduced to " << m_lastread[c]
+            cerr << "lastread reduced to " << m_lastread[c]
                  << ", time reduced to " << m_time[c]
                  << endl;
         }
@@ -483,7 +476,7 @@ D_IPP::doResample(int outspace, double ratio, bool final)
             int additionalcount = 0;
 
             if (c == 0 && m_debugLevel > 2) {
-                cerr << "D_IPP: final call, padding input with " << m_history
+                cerr << "final call, padding input with " << m_history
                      << " zeros (symmetrical with m_history)" << endl;
             }
             
@@ -492,14 +485,14 @@ D_IPP::doResample(int outspace, double ratio, bool final)
             }
 
             if (c == 0 && m_debugLevel > 2) {
-                cerr << "D_IPP: before resample call, time = " << m_time[c] << endl;
+                cerr << "before resample call, time = " << m_time[c] << endl;
             }
 
             int nAdditional = m_lastread[c] - int(m_time[c]);
 
             if (n + nAdditional > limit) {
                 if (c == 0 && m_debugLevel > 1) {
-                    cerr << "D_IPP: trimming final input samples from " << nAdditional
+                    cerr << "trimming final input samples from " << nAdditional
                          << " to " << (limit - n)
                          << " to avoid overrunning " << outspace << " at output"
                          << endl;
@@ -517,9 +510,9 @@ D_IPP::doResample(int outspace, double ratio, bool final)
                                       m_state[c]);
         
             if (c == 0 && m_debugLevel > 2) {
-                cerr << "D_IPP: converted " << n << " samples to " << additionalcount
+                cerr << "converted " << n << " samples to " << additionalcount
                      << ", time advanced to " << m_time[c] << endl;
-                cerr << "D_IPP: outcount = " << outcount << ", additionalcount = " << additionalcount << ", sum " << outcount + additionalcount << endl;
+                cerr << "outcount = " << outcount << ", additionalcount = " << additionalcount << ", sum " << outcount + additionalcount << endl;
             }
 
             if (c == 0) {
@@ -529,7 +522,7 @@ D_IPP::doResample(int outspace, double ratio, bool final)
     }
 
     if (m_debugLevel > 2) {
-        cerr << "D_IPP: returning " << outcount << " samples" << endl;
+        cerr << "returning " << outcount << " samples" << endl;
     }
     
     return outcount;
@@ -548,20 +541,21 @@ D_IPP::reset()
 class D_SRC : public Resampler::Impl
 {
 public:
-    D_SRC(Resampler::Quality quality, int channels, double initialSampleRate,
+    D_SRC(Resampler::Quality quality, Resampler::RatioChange ratioChange,
+          int channels, double initialSampleRate,
           int maxBufferSize, int m_debugLevel);
     ~D_SRC();
 
-    int resample(float *const R__ *const R__ out,
+    int resample(float *const BQ_R__ *const BQ_R__ out,
                  int outcount,
-                 const float *const R__ *const R__ in,
+                 const float *const BQ_R__ *const BQ_R__ in,
                  int incount,
                  double ratio,
                  bool final);
 
-    int resampleInterleaved(float *const R__ out,
+    int resampleInterleaved(float *const BQ_R__ out,
                             int outcount,
-                            const float *const R__ in,
+                            const float *const BQ_R__ in,
                             int incount,
                             double ratio,
                             bool final = false);
@@ -579,11 +573,12 @@ protected:
     int m_ioutsize;
     double m_prevRatio;
     bool m_ratioUnset;
+    bool m_smoothRatios;
     int m_debugLevel;
 };
 
-D_SRC::D_SRC(Resampler::Quality quality, int channels, double,
-             int maxBufferSize, int debugLevel) :
+D_SRC::D_SRC(Resampler::Quality quality, Resampler::RatioChange ratioChange,
+             int channels, double, int maxBufferSize, int debugLevel) :
     m_src(0),
     m_iin(0),
     m_iout(0),
@@ -592,25 +587,41 @@ D_SRC::D_SRC(Resampler::Quality quality, int channels, double,
     m_ioutsize(0),
     m_prevRatio(1.0),
     m_ratioUnset(true),
+    m_smoothRatios(ratioChange == Resampler::SmoothRatioChange),
     m_debugLevel(debugLevel)
 {
     if (m_debugLevel > 0) {
-        cerr << "Resampler::Resampler: using libsamplerate implementation"
-                  << endl;
+        cerr << "Resampler::Resampler: using implementation: libsamplerate"
+             << endl;
     }
 
+    if (channels < 1) {
+        cerr << "Resampler::Resampler: unable to create resampler: invalid channel count " << channels << " supplied" << endl;
+#ifdef NO_EXCEPTIONS
+        throw Resampler::ImplementationError;
+#endif
+        return;
+    }
+    
     int err = 0;
     m_src = src_new(quality == Resampler::Best ? SRC_SINC_BEST_QUALITY :
-                    quality == Resampler::Fastest ? SRC_LINEAR :
-                    SRC_SINC_FASTEST,
+                    quality == Resampler::Fastest ? SRC_SINC_FASTEST :
+                    SRC_SINC_MEDIUM_QUALITY,
                     channels, &err);
 
     if (err) {
         cerr << "Resampler::Resampler: failed to create libsamplerate resampler: " 
-                  << src_strerror(err) << endl;
+             << src_strerror(err) << endl;
 #ifndef NO_EXCEPTIONS
         throw Resampler::ImplementationError;
 #endif
+        return;
+    } else if (!m_src) {
+        cerr << "Resampler::Resampler: failed to create libsamplerate resampler, but no error reported?" << endl;
+#ifndef NO_EXCEPTIONS
+        throw Resampler::ImplementationError;
+#endif
+        return;
     }
 
     if (maxBufferSize > 0 && m_channels > 1) {
@@ -631,9 +642,9 @@ D_SRC::~D_SRC()
 }
 
 int
-D_SRC::resample(float *const R__ *const R__ out,
+D_SRC::resample(float *const BQ_R__ *const BQ_R__ out,
                 int outcount,
-                const float *const R__ *const R__ in,
+                const float *const BQ_R__ *const BQ_R__ in,
                 int incount,
                 double ratio,
                 bool final)
@@ -661,15 +672,15 @@ D_SRC::resample(float *const R__ *const R__ out,
 }
 
 int
-D_SRC::resampleInterleaved(float *const R__ out,
+D_SRC::resampleInterleaved(float *const BQ_R__ out,
                            int outcount,
-                           const float *const R__ in,
+                           const float *const BQ_R__ in,
                            int incount,
                            double ratio,
                            bool final)
 {
     SRC_DATA data;
-        
+
     // libsamplerate smooths the filter change over the duration of
     // the processing block to avoid artifacts due to sudden changes,
     // and it uses outcount to determine how long to smooth the change
@@ -682,7 +693,7 @@ D_SRC::resampleInterleaved(float *const R__ out,
         outcount = int(ceil(incount * ratio) + 5);
     }
 
-    if (m_ratioUnset) {
+    if (m_ratioUnset || !m_smoothRatios) {
 
         // The first time we set a ratio, we want to do it directly
         src_set_ratio(m_src, ratio);
@@ -724,10 +735,9 @@ D_SRC::resampleInterleaved(float *const R__ out,
 
     data.input_frames = incount;
     data.output_frames = outcount;
-    
     data.src_ratio = ratio;
     data.end_of_input = (final ? 1 : 0);
-    
+
     int err = src_process(m_src, &data);
 
     if (err) {
@@ -737,7 +747,7 @@ D_SRC::resampleInterleaved(float *const R__ out,
         throw Resampler::ImplementationError;
 #endif
     }
-    
+
     return (int)data.output_frames_gen;
 }
 
@@ -755,20 +765,21 @@ D_SRC::reset()
 class D_Resample : public Resampler::Impl
 {
 public:
-    D_Resample(Resampler::Quality quality, int channels, double initialSampleRate,
+    D_Resample(Resampler::Quality quality, Resampler::RatioChange,
+               int channels, double initialSampleRate,
                int maxBufferSize, int m_debugLevel);
     ~D_Resample();
 
-    int resample(float *const R__ *const R__ out,
+    int resample(float *const BQ_R__ *const BQ_R__ out,
                  int outcount,
-                 const float *const R__ *const R__ in,
+                 const float *const BQ_R__ *const BQ_R__ in,
                  int incount,
                  double ratio,
                  bool final);
 
-    int resampleInterleaved(float *const R__ out,
+    int resampleInterleaved(float *const BQ_R__ out,
                             int outcount,
-                            const float *const R__ in,
+                            const float *const BQ_R__ in,
                             int incount,
                             double ratio,
                             bool final);
@@ -799,7 +810,7 @@ D_Resample::D_Resample(Resampler::Quality quality,
     m_debugLevel(debugLevel)
 {
     if (m_debugLevel > 0) {
-        cerr << "Resampler::Resampler: using libresample implementation"
+        cerr << "Resampler::Resampler: using implementation: libresample"
                   << endl;
     }
 
@@ -836,9 +847,9 @@ D_Resample::~D_Resample()
 }
 
 int
-D_Resample::resample(float *const R__ *const R__ out,
+D_Resample::resample(float *const BQ_R__ *const BQ_R__ out,
                      int outcount,
-                     const float *const R__ *const R__ in,
+                     const float *const BQ_R__ *const BQ_R__ in,
                      int incount,
                      double ratio,
                      bool final)
@@ -895,9 +906,9 @@ D_Resample::resample(float *const R__ *const R__ out,
 }
 
 int
-D_Resample::resampleInterleaved(float *const R__ out,
+D_Resample::resampleInterleaved(float *const BQ_R__ out,
                                 int outcount,
-                                const float *const R__ in,
+                                const float *const BQ_R__ in,
                                 int incount,
                                 double ratio,
                                 bool final)
@@ -937,25 +948,174 @@ D_Resample::reset()
 
 #endif /* HAVE_LIBRESAMPLE */
 
+#ifdef USE_BQRESAMPLER
+    
+class D_BQResampler : public Resampler::Impl
+{
+public:
+    D_BQResampler(Resampler::Parameters params, int channels);
+    ~D_BQResampler();
+
+    int resample(float *const BQ_R__ *const BQ_R__ out,
+                 int outcount,
+                 const float *const BQ_R__ *const BQ_R__ in,
+                 int incount,
+                 double ratio,
+                 bool final);
+
+    int resampleInterleaved(float *const BQ_R__ out,
+                            int outcount,
+                            const float *const BQ_R__ in,
+                            int incount,
+                            double ratio,
+                            bool final = false);
+
+    int getChannelCount() const { return m_channels; }
+
+    void reset();
+
+protected:
+    BQResampler *m_resampler;
+    float *m_iin;
+    float *m_iout;
+    int m_channels;
+    int m_iinsize;
+    int m_ioutsize;
+    int m_debugLevel;
+};
+
+D_BQResampler::D_BQResampler(Resampler::Parameters params, int channels) :
+    m_resampler(0),
+    m_iin(0),
+    m_iout(0),
+    m_channels(channels),
+    m_iinsize(0),
+    m_ioutsize(0),
+    m_debugLevel(params.debugLevel)
+{
+    if (m_debugLevel > 0) {
+        cerr << "Resampler::Resampler: using implementation: BQResampler" << endl;
+    }
+
+    BQResampler::Parameters rparams;
+    switch (params.quality) {
+    case Resampler::Best:
+        rparams.quality = BQResampler::Best;
+        break;
+    case Resampler::FastestTolerable:
+        rparams.quality = BQResampler::FastestTolerable;
+        break;
+    case Resampler::Fastest:
+        rparams.quality = BQResampler::Fastest;
+        break;
+    }
+    switch (params.dynamism) {
+    case Resampler::RatioOftenChanging:
+        rparams.dynamism = BQResampler::RatioOftenChanging;
+        break;
+    case Resampler::RatioMostlyFixed:
+        rparams.dynamism = BQResampler::RatioMostlyFixed;
+        break;
+    }
+    switch (params.ratioChange) {
+    case Resampler::SmoothRatioChange:
+        rparams.ratioChange = BQResampler::SmoothRatioChange;
+        break;
+    case Resampler::SuddenRatioChange:
+        rparams.ratioChange = BQResampler::SuddenRatioChange;
+        break;
+    }
+    rparams.referenceSampleRate = params.initialSampleRate;
+    rparams.debugLevel = params.debugLevel;
+
+    m_resampler = new BQResampler(rparams, m_channels);
+    
+    if (params.maxBufferSize > 0 && m_channels > 1) {
+        m_iinsize = params.maxBufferSize * m_channels;
+        m_ioutsize = params.maxBufferSize * m_channels * 2;
+        m_iin = allocate<float>(m_iinsize);
+        m_iout = allocate<float>(m_ioutsize);
+    }
+}
+
+D_BQResampler::~D_BQResampler()
+{
+    delete m_resampler;
+    deallocate(m_iin);
+    deallocate(m_iout);
+}
+
+int
+D_BQResampler::resample(float *const BQ_R__ *const BQ_R__ out,
+                        int outcount,
+                        const float *const BQ_R__ *const BQ_R__ in,
+                        int incount,
+                        double ratio,
+                        bool final)
+{
+    if (m_channels == 1) {
+        return resampleInterleaved(*out, outcount, *in, incount, ratio, final);
+    }
+
+    if (incount * m_channels > m_iinsize) {
+        m_iin = reallocate<float>(m_iin, m_iinsize, incount * m_channels);
+        m_iinsize = incount * m_channels;
+    }
+    if (outcount * m_channels > m_ioutsize) {
+        m_iout = reallocate<float>(m_iout, m_ioutsize, outcount * m_channels);
+        m_ioutsize = outcount * m_channels;
+    }
+    
+    v_interleave(m_iin, in, m_channels, incount);
+    
+    int n = resampleInterleaved(m_iout, outcount, m_iin, incount, ratio, final);
+
+    v_deinterleave(out, m_iout, m_channels, n);
+
+    return n;
+}
+
+int
+D_BQResampler::resampleInterleaved(float *const BQ_R__ out,
+                                   int outcount,
+                                   const float *const BQ_R__ in,
+                                   int incount,
+                                   double ratio,
+                                   bool final)
+{
+    return m_resampler->resampleInterleaved(out, outcount,
+                                            in, incount,
+                                            ratio, final);
+}
+
+void
+D_BQResampler::reset()
+{
+    m_resampler->reset();
+}
+
+#endif /* USE_BQRESAMPLER */
+
 #ifdef USE_SPEEX
     
 class D_Speex : public Resampler::Impl
 {
 public:
-    D_Speex(Resampler::Quality quality, int channels, double initialSampleRate,
+    D_Speex(Resampler::Quality quality, Resampler::RatioChange,
+            int channels, double initialSampleRate,
             int maxBufferSize, int debugLevel);
     ~D_Speex();
 
-    int resample(float *const R__ *const R__ out,
+    int resample(float *const BQ_R__ *const BQ_R__ out,
                  int outcount,
-                 const float *const R__ *const R__ in,
+                 const float *const BQ_R__ *const BQ_R__ in,
                  int incount,
                  double ratio,
                  bool final);
 
-    int resampleInterleaved(float *const R__ out,
+    int resampleInterleaved(float *const BQ_R__ out,
                             int outcount,
-                            const float *const R__ in,
+                            const float *const BQ_R__ in,
                             int incount,
                             double ratio,
                             bool final = false);
@@ -982,7 +1142,7 @@ protected:
                     double ratio, bool final);
 };
 
-D_Speex::D_Speex(Resampler::Quality quality,
+D_Speex::D_Speex(Resampler::Quality quality, Resampler::RatioChange,
                  int channels, double initialSampleRate,
                  int maxBufferSize, int debugLevel) :
     m_resampler(0),
@@ -1000,7 +1160,7 @@ D_Speex::D_Speex(Resampler::Quality quality,
              quality == Resampler::Fastest ? 0 : 4);
 
     if (m_debugLevel > 0) {
-        cerr << "Resampler::Resampler: using Speex implementation with q = "
+        cerr << "Resampler::Resampler: using implementation: Speex with q = "
              << q << endl;
     }
 
@@ -1093,9 +1253,9 @@ D_Speex::setRatio(double ratio)
 }
 
 int
-D_Speex::resample(float *const R__ *const R__ out,
+D_Speex::resample(float *const BQ_R__ *const BQ_R__ out,
                   int outcount,
-                  const float *const R__ *const R__ in,
+                  const float *const BQ_R__ *const BQ_R__ in,
                   int incount,
                   double ratio,
                   bool final)
@@ -1136,9 +1296,9 @@ D_Speex::resample(float *const R__ *const R__ out,
 }
 
 int
-D_Speex::resampleInterleaved(float *const R__ out,
+D_Speex::resampleInterleaved(float *const BQ_R__ out,
                              int outcount,
-                             const float *const R__ in,
+                             const float *const BQ_R__ in,
                              int incount,
                              double ratio,
                              bool final)
@@ -1236,6 +1396,9 @@ Resampler::Resampler(Resampler::Parameters params, int channels)
 #ifdef HAVE_LIBRESAMPLE
         m_method = 3;
 #endif
+#ifdef USE_BQRESAMPLER
+        m_method = 4;
+#endif
 #ifdef HAVE_LIBSAMPLERATE
         m_method = 1;
 #endif
@@ -1251,6 +1414,9 @@ Resampler::Resampler(Resampler::Parameters params, int channels)
 #ifdef USE_SPEEX
         m_method = 2;
 #endif
+#ifdef USE_BQRESAMPLER
+        m_method = 4;
+#endif
 #ifdef HAVE_LIBSAMPLERATE
         m_method = 1;
 #endif
@@ -1266,6 +1432,9 @@ Resampler::Resampler(Resampler::Parameters params, int channels)
 #ifdef USE_SPEEX
         m_method = 2;
 #endif
+#ifdef USE_BQRESAMPLER
+        m_method = 4;
+#endif
 #ifdef HAVE_LIBSAMPLERATE
         m_method = 1;
 #endif
@@ -1281,7 +1450,7 @@ Resampler::Resampler(Resampler::Parameters params, int channels)
     case 0:
 #ifdef HAVE_IPP
         d = new Resamplers::D_IPP
-            (params.quality,
+            (params.quality, params.ratioChange,
              channels,
              params.initialSampleRate, params.maxBufferSize, params.debugLevel);
 #else
@@ -1293,7 +1462,7 @@ Resampler::Resampler(Resampler::Parameters params, int channels)
     case 1:
 #ifdef HAVE_LIBSAMPLERATE
         d = new Resamplers::D_SRC
-            (params.quality,
+            (params.quality, params.ratioChange,
              channels,
              params.initialSampleRate, params.maxBufferSize, params.debugLevel);
 #else
@@ -1305,7 +1474,7 @@ Resampler::Resampler(Resampler::Parameters params, int channels)
     case 2:
 #ifdef USE_SPEEX
         d = new Resamplers::D_Speex
-            (params.quality,
+            (params.quality, params.ratioChange,
              channels,
              params.initialSampleRate, params.maxBufferSize, params.debugLevel);
 #else
@@ -1317,12 +1486,21 @@ Resampler::Resampler(Resampler::Parameters params, int channels)
     case 3:
 #ifdef HAVE_LIBRESAMPLE
         d = new Resamplers::D_Resample
-            (params.quality,
+            (params.quality, params.ratioChange,
              channels,
              params.initialSampleRate, params.maxBufferSize, params.debugLevel);
 #else
         cerr << "Resampler::Resampler: No implementation available!" << endl;
         abort();
+#endif
+        break;
+
+    case 4:
+#ifdef USE_BQRESAMPLER
+        d = new Resamplers::D_BQResampler(params, channels);
+#else
+        cerr << "Resampler::Resampler: No implementation available!" << endl;
+        abort();
 #endif
         break;
     }
@@ -1340,26 +1518,24 @@ Resampler::~Resampler()
 }
 
 int 
-Resampler::resample(float *const R__ *const R__ out,
+Resampler::resample(float *const BQ_R__ *const BQ_R__ out,
                     int outcount,
-                    const float *const R__ *const R__ in,
+                    const float *const BQ_R__ *const BQ_R__ in,
                     int incount,
                     double ratio,
                     bool final)
 {
-    Profiler profiler("Resampler::resample");
     return d->resample(out, outcount, in, incount, ratio, final);
 }
 
 int 
-Resampler::resampleInterleaved(float *const R__ out,
+Resampler::resampleInterleaved(float *const BQ_R__ out,
                                int outcount,
-                               const float *const R__ in,
+                               const float *const BQ_R__ in,
                                int incount,
                                double ratio,
                                bool final)
 {
-    Profiler profiler("Resampler::resampleInterleaved");
     return d->resampleInterleaved(out, outcount, in, incount, ratio, final);
 }
 
diff --git a/src/dsp/Resampler.h b/src/dsp/Resampler.h
index 4a1f723..1bb28a8 100644
--- a/src/dsp/Resampler.h
+++ b/src/dsp/Resampler.h
@@ -1,4 +1,4 @@
-/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
+//* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
 
 /*
     Rubber Band Library
@@ -32,6 +32,9 @@ class Resampler
 {
 public:
     enum Quality { Best, FastestTolerable, Fastest };
+    enum Dynamism { RatioOftenChanging, RatioMostlyFixed };
+    enum RatioChange { SmoothRatioChange, SuddenRatioChange };
+    
     enum Exception { ImplementationError };
 
     struct Parameters {
@@ -41,6 +44,22 @@ public:
          */
         Quality quality;
 
+        /**
+         * Performance hint indicating whether the ratio is expected
+         * to change regularly or not. If not, more work may happen on
+         * ratio changes to reduce work when ratio is unchanged.
+         */
+        Dynamism dynamism; 
+
+        /**
+         * Hint indicating whether to smooth transitions, via filter
+         * interpolation or some such method, at ratio change
+         * boundaries, or whether to make a precise switch to the new
+         * ratio without regard to audible artifacts. The actual
+         * effect of this depends on the implementation in use.
+         */
+        RatioChange ratioChange;
+        
         /** 
          * Rate of expected input prior to resampling: may be used to
          * determine the filter bandwidth for the quality setting. If
@@ -67,6 +86,8 @@ public:
 
         Parameters() :
             quality(FastestTolerable),
+            dynamism(RatioMostlyFixed),
+            ratioChange(SmoothRatioChange),
             initialSampleRate(44100),
             maxBufferSize(0),
             debugLevel(0) { }
diff --git a/src/system/Allocators.h b/src/system/Allocators.h
index 355bf29..1536954 100644
--- a/src/system/Allocators.h
+++ b/src/system/Allocators.h
@@ -29,6 +29,8 @@
 #include <new> // for std::bad_alloc
 #include <stdlib.h>
 
+#include <stdexcept>
+
 #ifndef HAVE_POSIX_MEMALIGN
 #ifndef _WIN32
 #ifndef __APPLE__
@@ -309,6 +311,96 @@ private:
     T *m_t;
 };
 
+/** Allocator for use with STL classes, e.g. vector, to ensure
+ *  alignment.  Based on example code by Stephan T. Lavavej.
+ *
+ *  e.g. std::vector<float, StlAllocator<float> > v;
+ */
+template <typename T>
+class StlAllocator
+{
+public:
+    typedef T *pointer;
+    typedef const T *const_pointer;
+    typedef T &reference;
+    typedef const T &const_reference;
+    typedef T value_type;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+
+    StlAllocator() { }
+    StlAllocator(const StlAllocator&) { }
+    template <typename U> StlAllocator(const StlAllocator<U>&) { }
+    ~StlAllocator() { }
+
+    T *
+    allocate(const size_t n) const {
+        if (n == 0) return 0;
+        if (n > max_size()) {
+#ifndef NO_EXCEPTIONS
+            throw std::length_error("Size overflow in StlAllocator::allocate()");
+#else
+            abort();
+#endif
+        }
+        return ::RubberBand::allocate<T>(n);
+    }
+
+    void
+    deallocate(T *const p, const size_t) const {
+        ::RubberBand::deallocate(p);
+    }
+
+    template <typename U>
+    T *
+    allocate(const size_t n, const U *) const {
+        return allocate(n);
+    }
+
+    T *
+    address(T &r) const {
+        return &r;
+    }
+
+    const T *
+    address(const T &s) const {
+        return &s;
+    }
+
+    size_t
+    max_size() const {
+        return (static_cast<size_t>(0) - static_cast<size_t>(1)) / sizeof(T);
+    }
+
+    template <typename U> struct rebind {
+        typedef StlAllocator<U> other;
+    };
+    
+    bool
+    operator==(const StlAllocator &) const {
+        return true;
+    }
+
+    bool
+    operator!=(const StlAllocator &) const {
+        return false;
+    }
+
+    void
+    construct(T *const p, const T &t) const {
+        void *const pv = static_cast<void *>(p);
+        new (pv) T(t);
+    }
+
+    void
+    destroy(T *const p) const {
+        p->~T();
+    }
+
+private:
+    StlAllocator& operator=(const StlAllocator&);
+};
+
 }
 
 #endif
diff --git a/src/system/VectorOps.h b/src/system/VectorOps.h
index cb58498..5287be7 100644
--- a/src/system/VectorOps.h
+++ b/src/system/VectorOps.h
@@ -366,32 +366,32 @@ inline void v_scale(double *const R__ dst,
 }
 #endif
 
-template<typename T>
-inline void v_multiply(T *const R__ dst,
-                       const T *const R__ src,
+template<typename T, typename S>
+inline void v_multiply(T *const R__ srcdst,
+                       const S *const R__ src,
                        const int count)
 {
     for (int i = 0; i < count; ++i) {
-        dst[i] *= src[i];
+        srcdst[i] *= src[i];
     }
 }
 
 #if defined HAVE_IPP 
 template<>
-inline void v_multiply(float *const R__ dst,
+inline void v_multiply(float *const R__ srcdst,
                        const float *const R__ src,
                        const int count)
 {
-    ippsMul_32f_I(src, dst, count);
+    ippsMul_32f_I(src, srcdst, count);
 }
 template<>
-inline void v_multiply(double *const R__ dst,
+inline void v_multiply(double *const R__ srcdst,
                        const double *const R__ src,
                        const int count)
 {
-    ippsMul_64f_I(src, dst, count);
+    ippsMul_64f_I(src, srcdst, count);
 }
-#endif
+#endif // HAVE_IPP
 
 template<typename T>
 inline void v_multiply(T *const R__ dst,
@@ -491,6 +491,58 @@ inline T v_sum(const T *const R__ src,
     return result;
 }
 
+template<typename T>
+inline T v_multiply_and_sum(const T *const R__ src1,
+                            const T *const R__ src2,
+                            const int count)
+{
+    T result = T();
+    for (int i = 0; i < count; ++i) {
+        result += src1[i] * src2[i];
+    }
+    return result;
+}
+
+#if defined HAVE_IPP
+template<>
+inline float v_multiply_and_sum(const float *const R__ src1,
+                                const float *const R__ src2,
+                                const int count)
+{
+    float dp;
+    ippsDotProd_32f(src1, src2, count, &dp);
+    return dp;
+}
+template<>
+inline double v_multiply_and_sum(const double *const R__ src1,
+                                 const double *const R__ src2,
+                                 const int count)
+{
+    double dp;
+    ippsDotProd_64f(src1, src2, count, &dp);
+    return dp;
+}
+#elif defined HAVE_VDSP
+template<>
+inline float v_multiply_and_sum(const float *const R__ src1,
+                                const float *const R__ src2,
+                                const int count)
+{
+    float dp;
+    vDSP_dotpr(src1, 1, src2, 1, &dp, count);
+    return dp;
+}
+template<>
+inline double v_multiply_and_sum(const double *const R__ src1,
+                                 const double *const R__ src2,
+                                 const int count)
+{
+    double dp;
+    vDSP_dotprD(src1, 1, src2, 1, &dp, count);
+    return dp;
+}
+#endif
+
 template<typename T>
 inline void v_log(T *const R__ dst,
                   const int count)