Update to new combined build

2012-09-09 16:57:42 +01:00
parent 4ecb1fa6f1
commit 93c38b50a0
77 changed files with 10427 additions and 897 deletions
--- a/src/RubberBandStretcher.cpp
+++ b/src/RubberBandStretcher.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "StretcherImpl.h"
--- a/src/StretchCalculator.cpp
+++ b/src/StretchCalculator.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "StretchCalculator.h"
--- a/src/StretchCalculator.h
+++ b/src/StretchCalculator.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_STRETCH_CALCULATOR_H_
--- a/src/StretcherChannelData.cpp
+++ b/src/StretcherChannelData.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "StretcherChannelData.h"
--- a/src/StretcherChannelData.h
+++ b/src/StretcherChannelData.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_STRETCHERCHANNELDATA_H_
--- a/src/StretcherImpl.cpp
+++ b/src/StretcherImpl.cpp
@@ -1,25 +1,35 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "StretcherImpl.h"

-#include "dsp/PercussiveAudioCurve.h"
-#include "dsp/HighFrequencyAudioCurve.h"
-#include "dsp/SpectralDifferenceAudioCurve.h"
-#include "dsp/SilentAudioCurve.h"
-#include "dsp/ConstantAudioCurve.h"
-#include "dsp/CompoundAudioCurve.h"
+#include "audiocurves/PercussiveAudioCurve.h"
+#include "audiocurves/HighFrequencyAudioCurve.h"
+#include "audiocurves/SpectralDifferenceAudioCurve.h"
+#include "audiocurves/SilentAudioCurve.h"
+#include "audiocurves/ConstantAudioCurve.h"
+#include "audiocurves/CompoundAudioCurve.h"
+
 #include "dsp/Resampler.h"

 #include "StretchCalculator.h"
@@ -75,7 +85,9 @@ RubberBandStretcher::Impl::Impl(size_t sampleRate,
    m_outbufSize(m_defaultFftSize * 2),
    m_maxProcessSize(m_defaultFftSize),
    m_expectedInputDuration(0),
+#ifndef NO_THREADING
    m_threaded(false),
+#endif
    m_realtime(false),
    m_options(options),
    m_debugLevel(m_defaultDebugLevel),
@@ -84,7 +96,9 @@ RubberBandStretcher::Impl::Impl(size_t sampleRate,
    m_afilter(0),
    m_swindow(0),
    m_studyFFT(0),
+#ifndef NO_THREADING
    m_spaceAvailable("space"),
+#endif
    m_inputDuration(0),
    m_detectorType(CompoundAudioCurve::CompoundDetector),
    m_silentHistory(0),
@@ -145,6 +159,7 @@ RubberBandStretcher::Impl::Impl(size_t sampleRate,
        }
    }

+#ifndef NO_THREADING
    if (m_channels > 1) {

        m_threaded = true;
@@ -162,12 +177,14 @@ RubberBandStretcher::Impl::Impl(size_t sampleRate,
            cerr << "Going multithreaded..." << endl;
        }
    }
+#endif

    configure();
 }

 RubberBandStretcher::Impl::~Impl()
 {
+#ifndef NO_THREADING
    if (m_threaded) {
        MutexLocker locker(&m_threadSetMutex);
        for (set<ProcessThread *>::iterator i = m_threadSet.begin();
@@ -180,6 +197,7 @@ RubberBandStretcher::Impl::~Impl()
            delete *i;
        }
    }
+#endif

    for (size_t c = 0; c < m_channels; ++c) {
        delete m_channelData[c];
@@ -204,6 +222,7 @@ RubberBandStretcher::Impl::~Impl()
 void
 RubberBandStretcher::Impl::reset()
 {
+#ifndef NO_THREADING
    if (m_threaded) {
        m_threadSetMutex.lock();
        for (set<ProcessThread *>::iterator i = m_threadSet.begin();
@@ -217,6 +236,7 @@ RubberBandStretcher::Impl::reset()
        }
        m_threadSet.clear();
    }
+#endif

    m_emergencyScavenger.scavenge();

@@ -235,7 +255,9 @@ RubberBandStretcher::Impl::reset()
    m_inputDuration = 0;
    m_silentHistory = 0;

+#ifndef NO_THREADING
    if (m_threaded) m_threadSetMutex.unlock();
+#endif

    reconfigure();
 }
@@ -534,6 +556,7 @@ RubberBandStretcher::Impl::calculateSizes()
        // the pitch scale changes
        m_outbufSize = m_outbufSize * 16;
    } else {
+#ifndef NO_THREADING
        if (m_threaded) {
            // This headroom is to permit the processing threads to
            // run ahead of the buffer output drainage; the exact
@@ -541,6 +564,7 @@ RubberBandStretcher::Impl::calculateSizes()
            // results
            m_outbufSize = m_outbufSize * 16;
        }
+#endif
    }

    if (m_debugLevel > 0) {
@@ -1220,6 +1244,7 @@ RubberBandStretcher::Impl::process(const float *const *input, size_t samples, bo
            }
        }

+#ifndef NO_THREADING
        if (m_threaded) {
            MutexLocker locker(&m_threadSetMutex);

@@ -1233,6 +1258,7 @@ RubberBandStretcher::Impl::process(const float *const *input, size_t samples, bo
                cerr << m_channels << " threads created" << endl;
            }
        }
+#endif
        
        m_mode = Processing;
    }
@@ -1270,7 +1296,9 @@ RubberBandStretcher::Impl::process(const float *const *input, size_t samples, bo
 //                cerr << "process: happy with channel " << c << endl;
            }
            if (
+#ifndef NO_THREADING
                !m_threaded &&
+#endif
                !m_realtime) {
                bool any = false, last = false;
                processChunks(c, any, last);
@@ -1284,6 +1312,7 @@ RubberBandStretcher::Impl::process(const float *const *input, size_t samples, bo
            // the realtime onset detector
            processOneChunk();
        }
+#ifndef NO_THREADING
        if (m_threaded) {
            for (ThreadSet::iterator i = m_threadSet.begin();
                 i != m_threadSet.end(); ++i) {
@@ -1295,6 +1324,7 @@ RubberBandStretcher::Impl::process(const float *const *input, size_t samples, bo
            }
            m_spaceAvailable.unlock();
        }
+#endif

        if (m_debugLevel > 2) {
            if (!allConsumed) cerr << "process looping" << endl;
--- a/src/StretcherImpl.h
+++ b/src/StretcherImpl.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_STRETCHERIMPL_H_
@@ -20,7 +29,8 @@
 #include "dsp/Window.h"
 #include "dsp/SincWindow.h"
 #include "dsp/FFT.h"
-#include "dsp/CompoundAudioCurve.h"
+
+#include "audiocurves/CompoundAudioCurve.h"

 #include "base/RingBuffer.h"
 #include "base/Scavenger.h"
@@ -34,7 +44,11 @@ using namespace RubberBand;
 namespace RubberBand
 {

+#ifdef PROCESS_SAMPLE_TYPE
+typedef PROCESS_SAMPLE_TYPE process_t;
+#else
 typedef double process_t;
+#endif

 class AudioCurveCalculator;
 class StretchCalculator;
@@ -161,7 +175,9 @@ protected:
    size_t m_maxProcessSize;
    size_t m_expectedInputDuration;

+#ifndef NO_THREADING    
    bool m_threaded;
+#endif

    bool m_realtime;
    Options m_options;
@@ -183,6 +199,7 @@ protected:
    Window<float> *m_swindow;
    FFT *m_studyFFT;

+#ifndef NO_THREADING
    Condition m_spaceAvailable;
    
    class ProcessThread : public Thread
@@ -203,6 +220,13 @@ protected:
    typedef std::set<ProcessThread *> ThreadSet;
    ThreadSet m_threadSet;
    
+#if defined HAVE_IPP && !defined USE_SPEEX
+    // Exasperatingly, the IPP polyphase resampler does not appear to
+    // be thread-safe as advertised -- a good reason to prefer the
+    // Speex alternative
+    Mutex m_resamplerMutex;
+#endif
+#endif

    size_t m_inputDuration;
    CompoundAudioCurve::Type m_detectorType;
--- a/src/StretcherProcess.cpp
+++ b/src/StretcherProcess.cpp
@@ -1,22 +1,31 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "StretcherImpl.h"

-#include "dsp/PercussiveAudioCurve.h"
-#include "dsp/HighFrequencyAudioCurve.h"
-#include "dsp/ConstantAudioCurve.h"
+#include "audiocurves/PercussiveAudioCurve.h"
+#include "audiocurves/HighFrequencyAudioCurve.h"
+#include "audiocurves/ConstantAudioCurve.h"

 #include "StretchCalculator.h"
 #include "StretcherChannelData.h"
@@ -42,6 +51,7 @@ using std::endl;

 namespace RubberBand {

+#ifndef NO_THREADING

 RubberBandStretcher::Impl::ProcessThread::ProcessThread(Impl *s, size_t c) :
    m_s(s),
@@ -117,6 +127,7 @@ RubberBandStretcher::Impl::ProcessThread::abandon()
    m_abandoning = true;
 }

+#endif

 bool
 RubberBandStretcher::Impl::resampleBeforeStretching() const
@@ -194,6 +205,13 @@ RubberBandStretcher::Impl::consumeChannel(size_t c,
            cd.setResampleBufSize(reqSize);
        }

+#ifndef NO_THREADING
+#if defined HAVE_IPP && !defined USE_SPEEX
+        if (m_threaded) {
+            m_resamplerMutex.lock();
+        }
+#endif
+#endif

        if (useMidSide) {
            ms = (float *)alloca(samples * sizeof(float));
@@ -209,6 +227,13 @@ RubberBandStretcher::Impl::consumeChannel(size_t c,
                                         1.0 / m_pitchScale,
                                         final);

+#ifndef NO_THREADING
+#if defined HAVE_IPP && !defined USE_SPEEX
+        if (m_threaded) {
+            m_resamplerMutex.unlock();
+        }
+#endif
+#endif
    }

    if (writable < toWrite) {
@@ -373,14 +398,18 @@ RubberBandStretcher::Impl::testInbufReadSpace(size_t c)
            // its input -- and that would give incorrect output, as
            // we know there is more input to come.

+#ifndef NO_THREADING
            if (!m_threaded) {
+#endif
                if (m_debugLevel > 1) {
                    cerr << "WARNING: RubberBandStretcher: read space < chunk size ("
                         << inbuf.getReadSpace() << " < " << m_aWindowSize
                         << ") when not all input written, on processChunks for channel " << c << endl;
                }

+#ifndef NO_THREADING
            }
+#endif
            return false;
        }
        
@@ -956,11 +985,13 @@ RubberBandStretcher::Impl::synthesiseChunk(size_t channel,

    if (!cd.unchanged) {

-        cd.fft->inversePolar(cd.mag, cd.phase, cd.dblbuf);
-
-        // our ffts produced unscaled results
+        // Our FFTs produced unscaled results. Scale before inverse
+        // transform rather than after, to avoid overflow if using a
+        // fixed-point FFT.
        float factor = 1.f / fsz;
-        v_scale(dblbuf, factor, fsz);
+        v_scale(cd.mag, factor, hs + 1);
+
+        cd.fft->inversePolar(cd.mag, cd.phase, cd.dblbuf);

        if (wsz == fsz) {
            v_convert(fltbuf, dblbuf + hs, hs);
@@ -1044,6 +1075,13 @@ RubberBandStretcher::Impl::writeChunk(size_t channel, size_t shiftIncrement, boo
            cd.setResampleBufSize(reqSize);
        }

+#ifndef NO_THREADING
+#if defined HAVE_IPP && !defined USE_SPEEX
+        if (m_threaded) {
+            m_resamplerMutex.lock();
+        }
+#endif
+#endif

        size_t outframes = cd.resampler->resample(&cd.accumulator,
                                                  &cd.resamplebuf,
@@ -1051,6 +1089,13 @@ RubberBandStretcher::Impl::writeChunk(size_t channel, size_t shiftIncrement, boo
                                                  1.0 / m_pitchScale,
                                                  last);

+#ifndef NO_THREADING
+#if defined HAVE_IPP && !defined USE_SPEEX
+        if (m_threaded) {
+            m_resamplerMutex.unlock();
+        }
+#endif
+#endif

        writeOutput(*cd.outbuf, cd.resamplebuf,
                    outframes, cd.outCount, theoreticalOut);
@@ -1158,14 +1203,18 @@ RubberBandStretcher::Impl::available() const
 {
    Profiler profiler("RubberBandStretcher::Impl::available");

+#ifndef NO_THREADING
    if (m_threaded) {
        MutexLocker locker(&m_threadSetMutex);
        if (m_channelData.empty()) return 0;
    } else {
        if (m_channelData.empty()) return 0;
    }
+#endif

+#ifndef NO_THREADING
    if (!m_threaded) {
+#endif
        for (size_t c = 0; c < m_channels; ++c) {
            if (m_channelData[c]->inputSize >= 0) {
 //                cerr << "available: m_done true" << endl;
@@ -1180,7 +1229,9 @@ RubberBandStretcher::Impl::available() const
                }
            }
        }
+#ifndef NO_THREADING
    }
+#endif

    size_t min = 0;
    bool consumed = true;
--- a/src/audiocurves/CompoundAudioCurve.cpp
+++ b/src/audiocurves/CompoundAudioCurve.cpp
@@ -1,20 +1,29 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "CompoundAudioCurve.h"

-#include "MovingMedian.h"
+#include "dsp/MovingMedian.h"

 #include <iostream>

--- a/src/audiocurves/CompoundAudioCurve.h
+++ b/src/audiocurves/CompoundAudioCurve.h
@@ -1,24 +1,33 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _COMPOUND_AUDIO_CURVE_H_
 #define _COMPOUND_AUDIO_CURVE_H_

-#include "AudioCurveCalculator.h"
+#include "dsp/AudioCurveCalculator.h"
 #include "PercussiveAudioCurve.h"
 #include "HighFrequencyAudioCurve.h"
-#include "SampleFilter.h"
+#include "dsp/SampleFilter.h"

 namespace RubberBand
 {
--- a/src/audiocurves/ConstantAudioCurve.cpp
+++ b/src/audiocurves/ConstantAudioCurve.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "ConstantAudioCurve.h"
--- a/src/audiocurves/ConstantAudioCurve.h
+++ b/src/audiocurves/ConstantAudioCurve.h
@@ -1,21 +1,30 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _CONSTANT_AUDIO_CURVE_H_
 #define _CONSTANT_AUDIO_CURVE_H_

-#include "AudioCurveCalculator.h"
+#include "dsp/AudioCurveCalculator.h"

 namespace RubberBand
 {
--- a/src/audiocurves/HighFrequencyAudioCurve.cpp
+++ b/src/audiocurves/HighFrequencyAudioCurve.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "HighFrequencyAudioCurve.h"
--- a/src/audiocurves/HighFrequencyAudioCurve.h
+++ b/src/audiocurves/HighFrequencyAudioCurve.h
@@ -1,21 +1,30 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _HIGHFREQUENCY_AUDIO_CURVE_H_
 #define _HIGHFREQUENCY_AUDIO_CURVE_H_

-#include "AudioCurveCalculator.h"
+#include "dsp/AudioCurveCalculator.h"

 namespace RubberBand
 {
--- a/src/audiocurves/PercussiveAudioCurve.cpp
+++ b/src/audiocurves/PercussiveAudioCurve.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "PercussiveAudioCurve.h"
--- a/src/audiocurves/PercussiveAudioCurve.h
+++ b/src/audiocurves/PercussiveAudioCurve.h
@@ -1,21 +1,30 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _PERCUSSIVE_AUDIO_CURVE_H_
 #define _PERCUSSIVE_AUDIO_CURVE_H_

-#include "AudioCurveCalculator.h"
+#include "dsp/AudioCurveCalculator.h"

 namespace RubberBand
 {
--- a/src/audiocurves/SilentAudioCurve.cpp
+++ b/src/audiocurves/SilentAudioCurve.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "SilentAudioCurve.h"
--- a/src/audiocurves/SilentAudioCurve.h
+++ b/src/audiocurves/SilentAudioCurve.h
@@ -1,21 +1,30 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _SILENT_AUDIO_CURVE_H_
 #define _SILENT_AUDIO_CURVE_H_

-#include "AudioCurveCalculator.h"
+#include "dsp/AudioCurveCalculator.h"

 namespace RubberBand
 {
--- a/src/audiocurves/SpectralDifferenceAudioCurve.cpp
+++ b/src/audiocurves/SpectralDifferenceAudioCurve.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "SpectralDifferenceAudioCurve.h"
--- a/src/audiocurves/SpectralDifferenceAudioCurve.h
+++ b/src/audiocurves/SpectralDifferenceAudioCurve.h
@@ -1,22 +1,31 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _SPECTRALDIFFERENCE_AUDIO_CURVE_H_
 #define _SPECTRALDIFFERENCE_AUDIO_CURVE_H_

-#include "AudioCurveCalculator.h"
-#include "Window.h"
+#include "dsp/AudioCurveCalculator.h"
+#include "dsp/Window.h"

 namespace RubberBand
 {
--- a/src/base/Profiler.cpp
+++ b/src/base/Profiler.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "Profiler.h"
@@ -19,7 +28,12 @@
 #include <string>
 #include <map>

-#include <cstdio>
+#include <stdio.h>
+
+#ifdef __MSVC__
+// Ugh --cc
+#define snprintf sprintf_s
+#endif

 namespace RubberBand {

@@ -53,39 +67,23 @@ Profiler::add(const char *id, float ms)
 void
 Profiler::dump()
 {
+    std::string report = getReport();
+    fprintf(stderr, "%s", report.c_str());
+}
+
+std::string
+Profiler::getReport()
+{
+    static const int buflen = 256;
+    char buffer[buflen];
+    std::string report;
+
 #ifdef PROFILE_CLOCKS
-    fprintf(stderr, "Profiling points [CPU time]:\n");
+    snprintf(buffer, buflen, "Profiling points [CPU time]:\n");
 #else
-    fprintf(stderr, "Profiling points [Wall time]:\n");
+    snprintf(buffer, buflen, "Profiling points [Wall time]:\n");
 #endif
-
-    fprintf(stderr, "\nBy name:\n");
-
-    typedef std::set<const char *, std::less<std::string> > StringSet;
-
-    StringSet profileNames;
-    for (ProfileMap::const_iterator i = m_profiles.begin();
-         i != m_profiles.end(); ++i) {
-        profileNames.insert(i->first);
-    }
-
-    for (StringSet::const_iterator i = profileNames.begin();
-         i != profileNames.end(); ++i) {
-
-        ProfileMap::const_iterator j = m_profiles.find(*i);
-        if (j == m_profiles.end()) continue;
-
-        const TimePair &pp(j->second);
-        fprintf(stderr, "%s(%d):\n", *i, pp.first);
-        fprintf(stderr, "\tReal: \t%f ms      \t[%f ms total]\n",
-                (pp.second / pp.first),
-                (pp.second));
-
-        WorstCallMap::const_iterator k = m_worstCalls.find(*i);
-        if (k == m_worstCalls.end()) continue;
-        
-        fprintf(stderr, "\tWorst:\t%f ms/call\n", k->second);
-    }
+    report += buffer;

    typedef std::multimap<float, const char *> TimeRMap;
    typedef std::multimap<int, const char *> IntRMap;
@@ -105,29 +103,71 @@ Profiler::dump()
        worstmap.insert(TimeRMap::value_type(i->second, i->first));
    }

-    fprintf(stderr, "\nBy total:\n");
+    snprintf(buffer, buflen, "\nBy total:\n");
+    report += buffer;
    for (TimeRMap::const_iterator i = totmap.end(); i != totmap.begin(); ) {
        --i;
-        fprintf(stderr, "%-40s  %f ms\n", i->second, i->first);
+        snprintf(buffer, buflen, "%-40s  %f ms\n", i->second, i->first);
+        report += buffer;
    }

-    fprintf(stderr, "\nBy average:\n");
+    snprintf(buffer, buflen, "\nBy average:\n");
+    report += buffer;
    for (TimeRMap::const_iterator i = avgmap.end(); i != avgmap.begin(); ) {
        --i;
-        fprintf(stderr, "%-40s  %f ms\n", i->second, i->first);
+        snprintf(buffer, buflen, "%-40s  %f ms\n", i->second, i->first);
+        report += buffer;
    }

-    fprintf(stderr, "\nBy worst case:\n");
+    snprintf(buffer, buflen, "\nBy worst case:\n");
+    report += buffer;
    for (TimeRMap::const_iterator i = worstmap.end(); i != worstmap.begin(); ) {
        --i;
-        fprintf(stderr, "%-40s  %f ms\n", i->second, i->first);
+        snprintf(buffer, buflen, "%-40s  %f ms\n", i->second, i->first);
+        report += buffer;
    }

-    fprintf(stderr, "\nBy number of calls:\n");
+    snprintf(buffer, buflen, "\nBy number of calls:\n");
+    report += buffer;
    for (IntRMap::const_iterator i = ncallmap.end(); i != ncallmap.begin(); ) {
        --i;
-        fprintf(stderr, "%-40s  %d\n", i->second, i->first);
+        snprintf(buffer, buflen, "%-40s  %d\n", i->second, i->first);
+        report += buffer;
    }
+
+    snprintf(buffer, buflen, "\nBy name:\n");
+    report += buffer;
+
+    typedef std::set<const char *, std::less<std::string> > StringSet;
+
+    StringSet profileNames;
+    for (ProfileMap::const_iterator i = m_profiles.begin();
+         i != m_profiles.end(); ++i) {
+        profileNames.insert(i->first);
+    }
+
+    for (StringSet::const_iterator i = profileNames.begin();
+         i != profileNames.end(); ++i) {
+
+        ProfileMap::const_iterator j = m_profiles.find(*i);
+        if (j == m_profiles.end()) continue;
+
+        const TimePair &pp(j->second);
+        snprintf(buffer, buflen, "%s(%d):\n", *i, pp.first);
+        report += buffer;
+        snprintf(buffer, buflen, "\tReal: \t%f ms      \t[%f ms total]\n",
+                (pp.second / pp.first),
+                (pp.second));
+        report += buffer;
+
+        WorstCallMap::const_iterator k = m_worstCalls.find(*i);
+        if (k == m_worstCalls.end()) continue;
+        
+        snprintf(buffer, buflen, "\tWorst:\t%f ms/call\n", k->second);
+        report += buffer;
+    }
+
+    return report;
 }

 Profiler::Profiler(const char* c) :
--- a/src/base/Profiler.h
+++ b/src/base/Profiler.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _PROFILER_H_
@@ -40,7 +49,10 @@
 #endif
 #endif

+#ifndef NO_TIMING
 #include <map>
+#include <string>
+#endif

 namespace RubberBand {

@@ -56,6 +68,12 @@ public:

    static void dump();

+    // Unlike the other functions, this is only defined if NO_TIMING
+    // is not set (because it uses std::string which is otherwise
+    // unused here). So, treat this as a tricksy internal function
+    // rather than an API call and guard any call to it appropriately.
+    static std::string getReport();
+
 protected:
    const char* m_c;
 #ifdef PROFILE_CLOCKS
--- a/src/base/RingBuffer.h
+++ b/src/base/RingBuffer.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_RINGBUFFER_H_
@@ -47,7 +56,7 @@ public:
     * power of two, this means n should ideally be some power of two
     * minus one.
     */
-    RingBuffer(int n = 0);
+    RingBuffer(int n);

    virtual ~RingBuffer();

@@ -268,8 +277,7 @@ RingBuffer<T>::reset()
    std::cerr << "RingBuffer<T>[" << this << "]::reset" << std::endl;
 #endif

-    m_writer = 0;
-    m_reader = 0;
+    m_reader = m_writer;
 }

 template <typename T>
@@ -298,7 +306,7 @@ RingBuffer<T>::read(S *const R__ destination, int n)
    if (n > available) {
 	std::cerr << "WARNING: RingBuffer::read: " << n << " requested, only "
                  << available << " available" << std::endl;
-        v_zero(destination + available, n - available);
+//!!!        v_zero(destination + available, n - available);
 	n = available;
    }
    if (n == 0) return n;
@@ -367,7 +375,7 @@ RingBuffer<T>::readOne()
    if (w == r) {
 	std::cerr << "WARNING: RingBuffer::readOne: no sample available"
 		  << std::endl;
-	return 0;
+	return T();
    }

    T value = m_buffer[r];
--- a/src/base/Scavenger.h
+++ b/src/base/Scavenger.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_SCAVENGER_H_
--- a/src/dsp/AudioCurveCalculator.cpp
+++ b/src/dsp/AudioCurveCalculator.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "AudioCurveCalculator.h"
--- a/src/dsp/AudioCurveCalculator.h
+++ b/src/dsp/AudioCurveCalculator.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _AUDIO_CURVE_CALCULATOR_H_
@@ -93,6 +102,13 @@ public:
     */
    virtual double processDouble(const double *R__ mag, int increment) = 0;

+    /**
+     * Obtain a confidence for the curve value (if applicable). A
+     * value of 1.0 indicates perfect confidence in the curve
+     * calculation, 0.0 indicates none.
+     */
+    virtual double getConfidence() const { return 1.0; }
+
    /**
     * Reset the calculator, forgetting the history of the audio input
     * so far.
--- a/src/dsp/FFT.cpp
+++ b/src/dsp/FFT.cpp
--- a/src/dsp/FFT.h
+++ b/src/dsp/FFT.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_FFT_H_
@@ -17,6 +26,9 @@

 #include "system/sysutils.h"

+#include <string>
+#include <set>
+
 namespace RubberBand {

 class FFTImpl;
@@ -41,7 +53,7 @@ class FFTImpl;
 class FFT
 {
 public:
-    enum Exception { InvalidSize };
+    enum Exception { InvalidSize, InvalidImplementation, InternalError };

    FFT(int size, int debugLevel = 0); // may throw InvalidSize
    ~FFT();
@@ -73,11 +85,36 @@ public:
    void initFloat();
    void initDouble();

-    static void tune();
+    enum Precision {
+        SinglePrecision = 0x1,
+        DoublePrecision = 0x2
+    };
+    typedef int Precisions;
+
+    /**
+     * Return the OR of all precisions supported by this
+     * implementation. All of the functions (float and double) are
+     * available regardless of the supported implementations, but they
+     * will be calculated at the proper precision only if it is
+     * available. (So float functions will be calculated using doubles
+     * and then truncated if single-precision is unavailable, and
+     * double functions will use single-precision arithmetic if double
+     * is unavailable.)
+     */
+    Precisions getSupportedPrecisions() const;
+
+    static std::set<std::string> getImplementations();
+    static std::string getDefaultImplementation();
+    static void setDefaultImplementation(std::string);
+
+#ifdef FFT_MEASUREMENT
+    static std::string tune();
+#endif

 protected:
    FFTImpl *d;
-    static int m_method;
+    static std::string m_implementation;
+    static void pickDefaultImplementation();
 };

 }
--- a/src/dsp/MovingMedian.h
+++ b/src/dsp/MovingMedian.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _MOVING_MEDIAN_H_
--- a/src/dsp/Resampler.cpp
+++ b/src/dsp/Resampler.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "Resampler.h"
@@ -22,6 +31,11 @@

 #include "system/Allocators.h"

+#ifdef HAVE_IPP
+#include <ipps.h>
+#include <ippsr.h>
+#include <ippac.h>
+#endif

 #ifdef HAVE_LIBSAMPLERATE
 #include <samplerate.h>
@@ -31,12 +45,19 @@
 #include <libresample.h>
 #endif

+#ifdef USE_SPEEX
+#include "speex/speex_resampler.h"
+#endif

+#ifndef HAVE_IPP
 #ifndef HAVE_LIBSAMPLERATE
 #ifndef HAVE_LIBRESAMPLE
+#ifndef USE_SPEEX
 #error No resampler implementation selected!
 #endif
 #endif
+#endif
+#endif

 namespace RubberBand {

@@ -64,6 +85,360 @@ public:

 namespace Resamplers {

+#ifdef HAVE_IPP
+
+class D_IPP : public ResamplerImpl
+{
+public:
+    D_IPP(Resampler::Quality quality, int channels, int maxBufferSize,
+          int debugLevel);
+    ~D_IPP();
+
+    int resample(const float *const R__ *const R__ in,
+                 float *const R__ *const R__ out,
+                 int incount,
+                 float ratio,
+                 bool final);
+
+    int resampleInterleaved(const float *const R__ in,
+                            float *const R__ out,
+                            int incount,
+                            float ratio,
+                            bool final = false);
+
+    int getChannelCount() const { return m_channels; }
+
+    void reset();
+
+protected:
+    IppsResamplingPolyphase_32f **m_state;
+    float **m_inbuf;
+    size_t m_inbufsz;
+    float **m_outbuf;
+    size_t m_outbufsz;
+    int m_bufsize;
+    int m_channels;
+    int m_window;
+    float m_factor;
+    int m_history;
+    int *m_lastread;
+    double *m_time;
+    int m_debugLevel;
+    
+    void setBufSize(int);
+};
+
+D_IPP::D_IPP(Resampler::Quality quality, int channels, int maxBufferSize,
+             int debugLevel) :
+    m_state(0),
+    m_channels(channels),
+    m_debugLevel(debugLevel)
+{
+    if (m_debugLevel > 0) {
+        std::cerr << "Resampler::Resampler: using IPP implementation"
+                  << std::endl;
+    }
+
+    int nStep;
+    IppHintAlgorithm hint;
+
+    switch (quality) {
+
+    case Resampler::Best:
+        m_window = 64;
+        nStep = 80;
+        hint = ippAlgHintAccurate;
+        break;
+
+    case Resampler::FastestTolerable:
+//        m_window = 48;
+        nStep = 16;
+        m_window = 16;
+//        nStep = 8;
+        hint = ippAlgHintFast;
+        break;
+
+    case Resampler::Fastest:
+        m_window = 24;
+        nStep = 64;
+        hint = ippAlgHintFast;
+        break;
+    }
+
+    m_factor = 8; // initial upper bound on m_ratio, may be amended later
+    m_history = int(m_window * 0.5 * std::max(1.0, 1.0 / m_factor)) + 1;
+
+    m_state = new IppsResamplingPolyphase_32f *[m_channels];
+
+    m_lastread = new int[m_channels];
+    m_time = new double[m_channels];
+
+    m_bufsize = maxBufferSize + m_history;
+
+    if (m_debugLevel > 1) {
+        std::cerr << "bufsize = " << m_bufsize << ", window = " << m_window << ", nStep = " << nStep << ", history = " << m_history << std::endl;
+    }
+
+    for (int c = 0; c < m_channels; ++c) {
+        ippsResamplePolyphaseInitAlloc_32f(&m_state[c],
+                                           float(m_window),
+                                           nStep,
+                                           0.95f,
+                                           9.0f,
+                                           hint);
+        m_lastread[c] = m_history;
+        m_time[c] = m_history;
+    }
+
+    m_inbufsz = m_bufsize + m_history + 2;
+    if (m_debugLevel > 1) {
+        std::cerr << "inbuf allocating " << m_bufsize << " + " << m_history << " + 2 = " << m_inbufsz << std::endl;
+    }
+
+    m_outbufsz = lrintf(ceil((m_bufsize - m_history) * m_factor + 2));
+    if (m_debugLevel > 1) {
+        std::cerr << "outbuf allocating (" << m_bufsize << " - " << m_history << ") * " << m_factor << " + 2 = " << m_outbufsz << std::endl;
+    }
+
+    m_inbuf  = allocate_and_zero_channels<float>(m_channels, m_inbufsz);
+    m_outbuf = allocate_and_zero_channels<float>(m_channels, m_outbufsz);
+
+    if (m_debugLevel > 1) {
+        std::cerr << "Resampler init done" << std::endl;
+    }
+}
+
+D_IPP::~D_IPP()
+{
+    for (int c = 0; c < m_channels; ++c) {
+        ippsResamplePolyphaseFree_32f(m_state[c]);
+    }
+
+    deallocate_channels(m_inbuf, m_channels);
+    deallocate_channels(m_outbuf, m_channels);
+
+    delete[] m_lastread;
+    delete[] m_time;
+    delete[] m_state;
+}
+
+void
+D_IPP::setBufSize(int sz)
+{
+    if (m_debugLevel > 1) {
+        std::cerr << "resize bufsize " << m_bufsize << " -> ";
+    }
+
+    m_bufsize = sz;
+
+    std::cerr << m_bufsize << std::endl;
+
+    int n1 = m_bufsize + m_history + 2;
+    int n2 = lrintf(ceil((m_bufsize - m_history) * m_factor + 2));
+
+    if (m_debugLevel > 1) {
+        std::cerr << "(outbufsize = " << n2 << ")" << std::endl;
+    }
+
+    m_inbuf = reallocate_and_zero_extend_channels
+        (m_inbuf, m_channels, m_inbufsz, m_channels, n1);
+
+    m_outbuf = reallocate_and_zero_extend_channels
+        (m_outbuf, m_channels, m_outbufsz, m_channels, n2);
+            
+    m_inbufsz = n1;
+    m_outbufsz = n2;
+}
+
+int
+D_IPP::resample(const float *const R__ *const R__ in,
+                float *const R__ *const R__ out,
+                int incount,
+                float ratio,
+                bool final)
+{
+    int outcount = 0;
+
+    if (ratio > m_factor) {
+        m_factor = ratio;
+        m_history = int(m_window * 0.5 * std::max(1.0, 1.0 / m_factor)) + 1;
+    }
+
+    for (int c = 0; c < m_channels; ++c) {
+        if (m_lastread[c] + incount + m_history > m_bufsize) {
+            setBufSize(m_lastread[c] + incount + m_history);
+        }
+    }
+
+    for (int c = 0; c < m_channels; ++c) {
+
+        for (int i = 0; i < incount; ++i) {
+            m_inbuf[c][m_lastread[c] + i] = in[c][i];
+        }
+        m_lastread[c] += incount;
+        
+        ippsResamplePolyphase_32f(m_state[c],
+                                  m_inbuf[c],
+                                  m_lastread[c] - m_history - int(m_time[c]),
+                                  m_outbuf[c],
+                                  ratio,
+                                  0.97f,
+                                  &m_time[c],
+                                  &outcount);
+
+        v_copy(out[c], m_outbuf[c], outcount);
+
+        ippsMove_32f(m_inbuf[c] + int(m_time[c]) - m_history,
+                     m_inbuf[c],
+                     m_lastread[c] + m_history - int(m_time[c]));
+
+        m_lastread[c] -= int(m_time[c]) - m_history;
+        m_time[c] -= int(m_time[c]) - m_history;
+
+        if (final) {
+
+            // Looks like this actually produces too many samples
+            // (additionalcount is a few samples too large).
+
+            // Also, we aren't likely to have enough space in the
+            // output buffer as the caller won't have allowed for
+            // all the samples we're retrieving here.
+
+            // What to do?
+
+            int additionalcount = 0;
+
+            for (int i = 0; i < m_history; ++i) {
+                m_inbuf[c][m_lastread[c] + i] = 0.f;
+            }
+            
+            ippsResamplePolyphase_32f(m_state[c],
+                                      m_inbuf[c],
+                                      m_lastread[c] - int(m_time[c]),
+                                      m_outbuf[c],
+                                      ratio,
+                                      0.97f,
+                                      &m_time[c],
+                                      &additionalcount);
+
+            if (m_debugLevel > 2) {
+                std::cerr << "incount = " << incount << ", outcount = " << outcount << ", additionalcount = " << additionalcount << ", sum " << outcount + additionalcount << ", est space = " << lrintf(ceil(incount * ratio)) <<std::endl;
+            }
+
+            v_copy(out[c] + outcount, m_outbuf[c], additionalcount);
+
+            outcount += additionalcount;
+        }
+    }
+
+    for (int c = 0; c < m_channels; ++c) {
+        ippsThreshold_32f_I(out[c], outcount, 1.f, ippCmpGreater);
+        ippsThreshold_32f_I(out[c], outcount, -1.f, ippCmpLess);
+    }
+
+    return outcount;
+}
+
+int
+D_IPP::resampleInterleaved(const float *const R__ in,
+                           float *const R__ out,
+                           int incount,
+                           float ratio,
+                           bool final)
+{
+    int outcount = 0;
+
+    if (ratio > m_factor) {
+        m_factor = ratio;
+        m_history = int(m_window * 0.5 * std::max(1.0, 1.0 / m_factor)) + 1;
+    }
+
+    for (int c = 0; c < m_channels; ++c) {
+        if (m_lastread[c] + incount + m_history > m_bufsize) {
+            setBufSize(m_lastread[c] + incount + m_history);
+        }
+    }
+
+    for (int c = 0; c < m_channels; ++c) {
+
+        for (int i = 0; i < incount; ++i) {
+            m_inbuf[c][m_lastread[c] + i] = in[i * m_channels + c];
+        }
+        m_lastread[c] += incount;
+        
+        ippsResamplePolyphase_32f(m_state[c],
+                                  m_inbuf[c],
+                                  m_lastread[c] - m_history - int(m_time[c]),
+                                  m_outbuf[c],
+                                  ratio,
+                                  0.97f,
+                                  &m_time[c],
+                                  &outcount);
+
+        ippsMove_32f(m_inbuf[c] + int(m_time[c]) - m_history,
+                     m_inbuf[c],
+                     m_lastread[c] + m_history - int(m_time[c]));
+
+        m_lastread[c] -= int(m_time[c]) - m_history;
+        m_time[c] -= int(m_time[c]) - m_history;
+    }
+
+    v_interleave(out, m_outbuf, m_channels, outcount);
+
+    if (final) {
+
+        // Looks like this actually produces too many samples
+        // (additionalcount is a few samples too large).
+
+        // Also, we aren't likely to have enough space in the
+        // output buffer as the caller won't have allowed for
+        // all the samples we're retrieving here.
+
+        // What to do?
+
+        int additionalcount = 0;
+        
+        for (int c = 0; c < m_channels; ++c) {
+
+            for (int i = 0; i < m_history; ++i) {
+                m_inbuf[c][m_lastread[c] + i] = 0.f;
+            }
+            
+            ippsResamplePolyphase_32f(m_state[c],
+                                      m_inbuf[c],
+                                      m_lastread[c] - int(m_time[c]),
+                                      m_outbuf[c],
+                                      ratio,
+                                      0.97f,
+                                      &m_time[c],
+                                      &additionalcount);
+
+            if (m_debugLevel > 2) {
+                std::cerr << "incount = " << incount << ", outcount = " << outcount << ", additionalcount = " << additionalcount << ", sum " << outcount + additionalcount << ", est space = " << lrintf(ceil(incount * ratio)) <<std::endl;
+            }
+        }
+
+        v_interleave(out + (outcount * m_channels),
+                     m_outbuf,
+                     m_channels,
+                     additionalcount);
+
+        outcount += additionalcount;
+    }
+
+    ippsThreshold_32f_I(out, outcount * m_channels, 1.f, ippCmpGreater);
+    ippsThreshold_32f_I(out, outcount * m_channels, -1.f, ippCmpLess);
+
+    return outcount;
+}
+
+void
+D_IPP::reset()
+{
+    //!!!
+}
+
+#endif /* HAVE_IPP */

 #ifdef HAVE_LIBSAMPLERATE

@@ -126,7 +501,9 @@ D_SRC::D_SRC(Resampler::Quality quality, int channels, int maxBufferSize,
    if (err) {
        std::cerr << "Resampler::Resampler: failed to create libsamplerate resampler: " 
                  << src_strerror(err) << std::endl;
+#ifndef NO_EXCEPTIONS
        throw Resampler::ImplementationError;
+#endif
    }

    if (maxBufferSize > 0 && m_channels > 1) {
@@ -184,7 +561,9 @@ D_SRC::resample(const float *const R__ *const R__ in,
    if (err) {
        std::cerr << "Resampler::process: libsamplerate error: "
                  << src_strerror(err) << std::endl;
+#ifndef NO_EXCEPTIONS
        throw Resampler::ImplementationError;
+#endif
    }

    if (m_channels > 1) {
@@ -220,7 +599,9 @@ D_SRC::resampleInterleaved(const float *const R__ in,
    if (err) {
        std::cerr << "Resampler::process: libsamplerate error: "
                  << src_strerror(err) << std::endl;
+#ifndef NO_EXCEPTIONS
        throw Resampler::ImplementationError;
+#endif
    }

    m_lastRatio = ratio;
@@ -424,6 +805,234 @@ D_Resample::reset()

 #endif /* HAVE_LIBRESAMPLE */

+#ifdef USE_SPEEX
+    
+class D_Speex : public ResamplerImpl
+{
+public:
+    D_Speex(Resampler::Quality quality, int channels, int maxBufferSize,
+            int debugLevel);
+    ~D_Speex();
+
+    int resample(const float *const R__ *const R__ in,
+                 float *const R__ *const R__ out,
+                 int incount,
+                 float ratio,
+                 bool final);
+
+    int resampleInterleaved(const float *const R__ in,
+                            float *const R__ out,
+                            int incount,
+                            float ratio,
+                            bool final = false);
+
+    int getChannelCount() const { return m_channels; }
+
+    void reset();
+
+protected:
+    SpeexResamplerState *m_resampler;
+    float *m_iin;
+    float *m_iout;
+    int m_channels;
+    int m_iinsize;
+    int m_ioutsize;
+    float m_lastratio;
+    bool m_initial;
+    int m_debugLevel;
+
+    void setRatio(float);
+};
+
+D_Speex::D_Speex(Resampler::Quality quality, int channels, int maxBufferSize,
+                 int debugLevel) :
+    m_resampler(0),
+    m_iin(0),
+    m_iout(0),
+    m_channels(channels),
+    m_iinsize(0),
+    m_ioutsize(0),
+    m_lastratio(1),
+    m_initial(true),
+    m_debugLevel(debugLevel)
+{
+    int q = (quality == Resampler::Best ? 10 :
+             quality == Resampler::Fastest ? 0 : 4);
+
+    if (m_debugLevel > 0) {
+        std::cerr << "Resampler::Resampler: using Speex implementation with q = "
+                  << q 
+                  << std::endl;
+    }
+
+    int err = 0;
+    m_resampler = speex_resampler_init_frac(m_channels,
+                                            1, 1,
+                                            48000, 48000, // irrelevant
+                                            q,
+                                            &err);
+    
+
+    if (err) {
+        std::cerr << "Resampler::Resampler: failed to create Speex resampler" 
+                  << std::endl;
+#ifndef NO_EXCEPTIONS
+        throw Resampler::ImplementationError;
+#endif
+    }
+
+    if (maxBufferSize > 0 && m_channels > 1) {
+        m_iinsize = maxBufferSize * m_channels;
+        m_ioutsize = maxBufferSize * m_channels * 2;
+        m_iin = allocate<float>(m_iinsize);
+        m_iout = allocate<float>(m_ioutsize);
+    }
+}
+
+D_Speex::~D_Speex()
+{
+    speex_resampler_destroy(m_resampler);
+    deallocate<float>(m_iin);
+    deallocate<float>(m_iout);
+}
+
+void
+D_Speex::setRatio(float ratio)
+{
+    // Speex wants a ratio of two unsigned integers, not a single
+    // float.  Let's do that.
+
+    unsigned int big = 272408136U; 
+    unsigned int denom = 1, num = 1;
+
+    if (ratio < 1.f) {
+        denom = big;
+        double dnum = double(big) * double(ratio);
+        num = (unsigned int)dnum;
+    } else if (ratio > 1.f) {
+        num = big;
+        double ddenom = double(big) / double(ratio);
+        denom = (unsigned int)ddenom;
+    }
+    
+    if (m_debugLevel > 1) {
+        std::cerr << "D_Speex: Desired ratio " << ratio << ", requesting ratio "
+                  << num << "/" << denom << " = " << float(double(num)/double(denom))
+                  << std::endl;
+    }
+    
+    int err = speex_resampler_set_rate_frac
+        (m_resampler, denom, num, 48000, 48000);
+    //!!! check err
+    
+    speex_resampler_get_ratio(m_resampler, &denom, &num);
+    
+    if (m_debugLevel > 1) {
+        std::cerr << "D_Speex: Desired ratio " << ratio << ", got ratio "
+                  << num << "/" << denom << " = " << float(double(num)/double(denom))
+                  << std::endl;
+    }
+    
+    m_lastratio = ratio;
+
+    if (m_initial) {
+        speex_resampler_skip_zeros(m_resampler);
+        m_initial = false;
+    }
+}
+
+int
+D_Speex::resample(const float *const R__ *const R__ in,
+                  float *const R__ *const R__ out,
+                  int incount,
+                  float ratio,
+                  bool final)
+{
+    if (ratio != m_lastratio) {
+        setRatio(ratio);
+    }
+
+    unsigned int uincount = incount;
+    unsigned int outcount = lrintf(ceilf(incount * ratio)); //!!! inexact now
+
+    float *data_in, *data_out;
+
+    if (m_channels == 1) {
+        data_in = const_cast<float *>(*in);
+        data_out = *out;
+    } else {
+        if (incount * m_channels > m_iinsize) {
+            m_iin = reallocate<float>(m_iin, m_iinsize, incount * m_channels);
+            m_iinsize = incount * m_channels;
+        }
+        if (outcount * m_channels > m_ioutsize) {
+            m_iout = reallocate<float>(m_iout, m_ioutsize, outcount * m_channels);
+            m_ioutsize = outcount * m_channels;
+        }
+        v_interleave(m_iin, in, m_channels, incount);
+        data_in = m_iin;
+        data_out = m_iout;
+    }
+
+    int err = speex_resampler_process_interleaved_float(m_resampler,
+                                                        data_in,
+                                                        &uincount,
+                                                        data_out,
+                                                        &outcount);
+
+//    if (incount != int(uincount)) {
+//        std::cerr << "Resampler: NOTE: Consumed " << uincount
+//                  << " of " << incount << " frames" << std::endl;
+//    }
+
+//    if (outcount != lrintf(ceilf(incount * ratio))) {
+//        std::cerr << "Resampler: NOTE: Obtained " << outcount
+//                  << " of " << lrintf(ceilf(incount * ratio)) << " frames"
+//                  << std::endl;
+//    }
+        
+    //!!! check err, respond appropriately
+
+    if (m_channels > 1) {
+        v_deinterleave(out, m_iout, m_channels, outcount);
+    }
+
+    return outcount;
+}
+
+int
+D_Speex::resampleInterleaved(const float *const R__ in,
+                             float *const R__ out,
+                             int incount,
+                             float ratio,
+                             bool final)
+{
+    if (ratio != m_lastratio) {
+        setRatio(ratio);
+    }
+
+    unsigned int uincount = incount;
+    unsigned int outcount = lrintf(ceilf(incount * ratio)); //!!! inexact now
+
+    float *data_in = const_cast<float *>(in);
+    float *data_out = out;
+
+    int err = speex_resampler_process_interleaved_float(m_resampler,
+                                                        data_in,
+                                                        &uincount,
+                                                        data_out,
+                                                        &outcount);
+
+    return outcount;
+}
+
+void
+D_Speex::reset()
+{
+    speex_resampler_reset_mem(m_resampler);
+}
+
+#endif

 } /* end namespace Resamplers */

@@ -435,6 +1044,12 @@ Resampler::Resampler(Resampler::Quality quality, int channels,
    switch (quality) {

    case Resampler::Best:
+#ifdef HAVE_IPP
+        m_method = 0;
+#endif
+#ifdef USE_SPEEX
+        m_method = 2;
+#endif
 #ifdef HAVE_LIBRESAMPLE
        m_method = 3;
 #endif
@@ -444,18 +1059,30 @@ Resampler::Resampler(Resampler::Quality quality, int channels,
        break;

    case Resampler::FastestTolerable:
+#ifdef HAVE_IPP
+        m_method = 0;
+#endif
 #ifdef HAVE_LIBRESAMPLE
        m_method = 3;
 #endif
 #ifdef HAVE_LIBSAMPLERATE
        m_method = 1;
+#endif
+#ifdef USE_SPEEX
+        m_method = 2;
 #endif
        break;

    case Resampler::Fastest:
+#ifdef HAVE_IPP
+        m_method = 0;
+#endif
 #ifdef HAVE_LIBRESAMPLE
        m_method = 3;
 #endif
+#ifdef USE_SPEEX
+        m_method = 2;
+#endif
 #ifdef HAVE_LIBSAMPLERATE
        m_method = 1;
 #endif
@@ -471,10 +1098,14 @@ Resampler::Resampler(Resampler::Quality quality, int channels,

    switch (m_method) {
    case 0:
+#ifdef HAVE_IPP
+        d = new Resamplers::D_IPP(quality, channels, maxBufferSize, debugLevel);
+#else
        std::cerr << "Resampler::Resampler(" << quality << ", " << channels
                  << ", " << maxBufferSize << "): No implementation available!"
                  << std::endl;
        abort();
+#endif
        break;

    case 1:
@@ -489,10 +1120,14 @@ Resampler::Resampler(Resampler::Quality quality, int channels,
        break;

    case 2:
+#ifdef USE_SPEEX
+        d = new Resamplers::D_Speex(quality, channels, maxBufferSize, debugLevel);
+#else
        std::cerr << "Resampler::Resampler(" << quality << ", " << channels
                  << ", " << maxBufferSize << "): No implementation available!"
                  << std::endl;
        abort();
+#endif
        break;

    case 3:
--- a/src/dsp/Resampler.h
+++ b/src/dsp/Resampler.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_RESAMPLER_H_
--- a/src/dsp/SampleFilter.h
+++ b/src/dsp/SampleFilter.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _SAMPLE_FILTER_H_
--- a/src/dsp/SincWindow.h
+++ b/src/dsp/SincWindow.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_SINC_WINDOW_H_
--- a/src/dsp/Window.cpp
+++ b/src/dsp/Window.cpp
@@ -1,17 +0,0 @@
-/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
-
-/*
-    Rubber Band
-    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2008 Chris Cannam.
-    
-    This program is free software; you can redistribute it and/or
-    modify it under the terms of the GNU General Public License as
-    published by the Free Software Foundation; either version 2 of the
-    License, or (at your option) any later version.  See the file
-    COPYING included with this distribution for more information.
-*/
-
-#include "Window.h"
-
-
--- a/src/dsp/Window.h
+++ b/src/dsp/Window.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_WINDOW_H_
--- a/src/getopt/getopt.h
+++ b/src/getopt/getopt.h
@@ -107,4 +107,4 @@ GETOPT_API int getopt __P((int, char * const *, const char *));
 __END_DECLS
 #endif
 
-#endif 
+#endif /* !_GETOPT_H_ */
--- a/src/jni/RubberBandStretcherJNI.cpp
+++ b/src/jni/RubberBandStretcherJNI.cpp
@@ -0,0 +1,370 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+/* Copyright Chris Cannam - All Rights Reserved */
+
+#include "rubberband/RubberBandStretcher.h"
+
+#include "system/Allocators.h"
+
+#include <jni.h>
+
+using namespace RubberBand;
+
+extern "C" {
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    dispose
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_dispose
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    reset
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_reset
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    setTimeRatio
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_setTimeRatio
+  (JNIEnv *, jobject, jdouble);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    setPitchScale
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_setPitchScale
+  (JNIEnv *, jobject, jdouble);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    getChannelCount
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_getChannelCount
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    getTimeRatio
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_getTimeRatio
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    getPitchScale
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_getPitchScale
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    getLatency
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_getLatency
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    setTransientsOption
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_setTransientsOption
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    setDetectorOption
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_setDetectorOption
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    setPhaseOption
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_setPhaseOption
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    setFormantOption
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_setFormantOption
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    setPitchOption
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_setPitchOption
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    setExpectedInputDuration
+ * Signature: (J)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_setExpectedInputDuration
+  (JNIEnv *, jobject, jlong);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    setMaxProcessSize
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_setMaxProcessSize
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    getSamplesRequired
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_getSamplesRequired
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    study
+ * Signature: ([[FZ)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_study
+  (JNIEnv *, jobject, jobjectArray, jboolean);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    process
+ * Signature: ([[FZ)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_process
+  (JNIEnv *, jobject, jobjectArray, jboolean);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    available
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_available
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    retrieve
+ * Signature: (I)[[F
+ */
+JNIEXPORT jint JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_retrieve
+  (JNIEnv *, jobject, jobjectArray);
+
+/*
+ * Class:     com_breakfastquay_rubberband_RubberBandStretcher
+ * Method:    initialise
+ * Signature: (IIIDD)V
+ */
+JNIEXPORT void JNICALL Java_com_breakfastquay_rubberband_RubberBandStretcher_initialise
+  (JNIEnv *, jobject, jint, jint, jint, jdouble, jdouble);
+
+}
+
+RubberBandStretcher *
+getStretcher(JNIEnv *env, jobject obj)
+{
+    jclass c = env->GetObjectClass(obj);
+    jfieldID fid = env->GetFieldID(c, "handle", "J");
+    jlong handle = env->GetLongField(obj, fid);
+    return (RubberBandStretcher *)handle;
+}
+
+void
+setStretcher(JNIEnv *env, jobject obj, RubberBandStretcher *stretcher)
+{
+    jclass c = env->GetObjectClass(obj);
+    jfieldID fid = env->GetFieldID(c, "handle", "J");
+    jlong handle = (jlong)stretcher;
+    env->SetLongField(obj, fid, handle);
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_initialise(JNIEnv *env, jobject obj, jint sampleRate, jint channels, jint options, jdouble initialTimeRatio, jdouble initialPitchScale)
+{
+    setStretcher(env, obj, new RubberBandStretcher
+                 (sampleRate, channels, options, initialTimeRatio, initialPitchScale));
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_dispose(JNIEnv *env, jobject obj)
+{
+    delete getStretcher(env, obj);
+    setStretcher(env, obj, 0);
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_reset(JNIEnv *env, jobject obj)
+{
+    getStretcher(env, obj)->reset();
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_setTimeRatio(JNIEnv *env, jobject obj, jdouble ratio)
+{
+    getStretcher(env, obj)->setTimeRatio(ratio);
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_setPitchScale(JNIEnv *env, jobject obj, jdouble scale)
+{
+    getStretcher(env, obj)->setPitchScale(scale);
+}
+
+jint
+Java_com_breakfastquay_rubberband_RubberBandStretcher_getChannelCount(JNIEnv *env, jobject obj)
+{
+    return getStretcher(env, obj)->getChannelCount();
+}
+
+jdouble
+Java_com_breakfastquay_rubberband_RubberBandStretcher_getTimeRatio(JNIEnv *env, jobject obj)
+{
+    return getStretcher(env, obj)->getTimeRatio();
+}
+
+jdouble
+Java_com_breakfastquay_rubberband_RubberBandStretcher_getPitchScale(JNIEnv *env, jobject obj)
+{
+    return getStretcher(env, obj)->getPitchScale();
+}
+
+jint
+Java_com_breakfastquay_rubberband_RubberBandStretcher_getLatency(JNIEnv *env, jobject obj)
+{
+    return getStretcher(env, obj)->getLatency();
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_setTransientsOption(JNIEnv *env, jobject obj, jint options)
+{
+    getStretcher(env, obj)->setTransientsOption(options);
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_setDetectorOption(JNIEnv *env, jobject obj, jint options)
+{
+    getStretcher(env, obj)->setDetectorOption(options);
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_setPhaseOption(JNIEnv *env, jobject obj, jint options)
+{
+    getStretcher(env, obj)->setPhaseOption(options);
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_setFormantOption(JNIEnv *env, jobject obj, jint options)
+{
+    getStretcher(env, obj)->setFormantOption(options);
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_setPitchOption(JNIEnv *env, jobject obj, jint options)
+{
+    getStretcher(env, obj)->setPitchOption(options);
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_setExpectedInputDuration(JNIEnv *env, jobject obj, jlong duration)
+{
+    getStretcher(env, obj)->setExpectedInputDuration(duration);
+}
+
+jint
+Java_com_breakfastquay_rubberband_RubberBandStretcher_getSamplesRequired(JNIEnv *env, jobject obj)
+{
+    return getStretcher(env, obj)->getSamplesRequired();
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_study(JNIEnv *env, jobject obj, jobjectArray data, jboolean final)
+{
+    int channels = env->GetArrayLength(data);
+    float **input = new float *[channels];
+    int samples = 0;
+    for (int c = 0; c < channels; ++c) {
+        jfloatArray cdata = (jfloatArray)env->GetObjectArrayElement(data, c);
+        samples = env->GetArrayLength(cdata);
+        input[c] = env->GetFloatArrayElements(cdata, 0);
+    }
+
+    getStretcher(env, obj)->study(input, samples, final);
+
+    for (int c = 0; c < channels; ++c) {
+        jfloatArray cdata = (jfloatArray)env->GetObjectArrayElement(data, c);
+        env->ReleaseFloatArrayElements(cdata, input[c], 0);
+    }
+}
+
+void
+Java_com_breakfastquay_rubberband_RubberBandStretcher_process(JNIEnv *env, jobject obj, jobjectArray data, jboolean final)
+{
+    int channels = env->GetArrayLength(data);
+    float **input = allocate<float *>(channels);
+    int samples = 0;
+    for (int c = 0; c < channels; ++c) {
+        jfloatArray cdata = (jfloatArray)env->GetObjectArrayElement(data, c);
+        samples = env->GetArrayLength(cdata);
+        input[c] = env->GetFloatArrayElements(cdata, 0);
+    }
+
+    getStretcher(env, obj)->process(input, samples, final);
+
+    for (int c = 0; c < channels; ++c) {
+        jfloatArray cdata = (jfloatArray)env->GetObjectArrayElement(data, c);
+        env->ReleaseFloatArrayElements(cdata, input[c], 0);
+    }
+
+    deallocate(input);
+}
+
+jint
+Java_com_breakfastquay_rubberband_RubberBandStretcher_available(JNIEnv *env, jobject obj)
+{
+    return getStretcher(env, obj)->available();
+}
+
+jint
+Java_com_breakfastquay_rubberband_RubberBandStretcher_retrieve(JNIEnv *env, jobject obj, jobjectArray output)
+{
+    RubberBandStretcher *stretcher = getStretcher(env, obj);
+    size_t channels = stretcher->getChannelCount();
+    
+    jfloatArray first = (jfloatArray)env->GetObjectArrayElement(output, 0);
+    int space = env->GetArrayLength(first);
+    env->DeleteLocalRef(first);
+
+    float **outbuf = allocate_channels<float>(channels, space);
+    size_t retrieved = stretcher->retrieve(outbuf, space);
+
+    for (int c = 0; c < channels; ++c) {
+        jfloatArray cdata = (jfloatArray)env->GetObjectArrayElement(output, c);
+        env->SetFloatArrayRegion(cdata, 0, retrieved, outbuf[c]);
+        env->DeleteLocalRef(cdata);
+    }
+    
+    deallocate_channels(outbuf, channels);
+    return retrieved;
+}
+
--- a/src/kissfft/COPYING
+++ b/src/kissfft/COPYING
@@ -0,0 +1,11 @@
+Copyright (c) 2003-2004 Mark Borgerding
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+    * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/src/kissfft/_kiss_fft_guts.h
+++ b/src/kissfft/_kiss_fft_guts.h
@@ -0,0 +1,150 @@
+/*
+Copyright (c) 2003-2004, Mark Borgerding
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+    * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* kiss_fft.h
+   defines kiss_fft_scalar as either short or a float type
+   and defines
+   typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
+#include "kiss_fft.h"
+#include <limits.h>
+
+#define MAXFACTORS 32
+/* e.g. an fft of length 128 has 4 factors 
+ as far as kissfft is concerned
+ 4*4*4*2
+ */
+
+struct kiss_fft_state{
+    int nfft;
+    int inverse;
+    int factors[2*MAXFACTORS];
+    kiss_fft_cpx twiddles[1];
+};
+
+/*
+  Explanation of macros dealing with complex math:
+
+   C_MUL(m,a,b)         : m = a*b
+   C_FIXDIV( c , div )  : if a fixed point impl., c /= div. noop otherwise
+   C_SUB( res, a,b)     : res = a - b
+   C_SUBFROM( res , a)  : res -= a
+   C_ADDTO( res , a)    : res += a
+ * */
+#ifdef FIXED_POINT
+#if (FIXED_POINT==32)
+# define FRACBITS 31
+# define SAMPPROD int64_t
+#define SAMP_MAX 2147483647
+#else
+# define FRACBITS 15
+# define SAMPPROD int32_t 
+#define SAMP_MAX 32767
+#endif
+
+#define SAMP_MIN -SAMP_MAX
+
+#if defined(CHECK_OVERFLOW)
+#  define CHECK_OVERFLOW_OP(a,op,b)  \
+	if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
+		fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) );  }
+#endif
+
+
+#   define smul(a,b) ( (SAMPPROD)(a)*(b) )
+#   define sround( x )  (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
+
+#   define S_MUL(a,b) sround( smul(a,b) )
+
+#   define C_MUL(m,a,b) \
+      do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
+          (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
+
+#   define DIVSCALAR(x,k) \
+	(x) = sround( smul(  x, SAMP_MAX/k ) )
+
+#   define C_FIXDIV(c,div) \
+	do {    DIVSCALAR( (c).r , div);  \
+		DIVSCALAR( (c).i  , div); }while (0)
+
+#   define C_MULBYSCALAR( c, s ) \
+    do{ (c).r =  sround( smul( (c).r , s ) ) ;\
+        (c).i =  sround( smul( (c).i , s ) ) ; }while(0)
+
+#else  /* not FIXED_POINT*/
+
+#   define S_MUL(a,b) ( (a)*(b) )
+#define C_MUL(m,a,b) \
+    do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
+        (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
+#   define C_FIXDIV(c,div) /* NOOP */
+#   define C_MULBYSCALAR( c, s ) \
+    do{ (c).r *= (s);\
+        (c).i *= (s); }while(0)
+#endif
+
+#ifndef CHECK_OVERFLOW_OP
+#  define CHECK_OVERFLOW_OP(a,op,b) /* noop */
+#endif
+
+#define  C_ADD( res, a,b)\
+    do { \
+	    CHECK_OVERFLOW_OP((a).r,+,(b).r)\
+	    CHECK_OVERFLOW_OP((a).i,+,(b).i)\
+	    (res).r=(a).r+(b).r;  (res).i=(a).i+(b).i; \
+    }while(0)
+#define  C_SUB( res, a,b)\
+    do { \
+	    CHECK_OVERFLOW_OP((a).r,-,(b).r)\
+	    CHECK_OVERFLOW_OP((a).i,-,(b).i)\
+	    (res).r=(a).r-(b).r;  (res).i=(a).i-(b).i; \
+    }while(0)
+#define C_ADDTO( res , a)\
+    do { \
+	    CHECK_OVERFLOW_OP((res).r,+,(a).r)\
+	    CHECK_OVERFLOW_OP((res).i,+,(a).i)\
+	    (res).r += (a).r;  (res).i += (a).i;\
+    }while(0)
+
+#define C_SUBFROM( res , a)\
+    do {\
+	    CHECK_OVERFLOW_OP((res).r,-,(a).r)\
+	    CHECK_OVERFLOW_OP((res).i,-,(a).i)\
+	    (res).r -= (a).r;  (res).i -= (a).i; \
+    }while(0)
+
+
+#ifdef FIXED_POINT
+#  define KISS_FFT_COS(phase)  floor(.5+SAMP_MAX * cos (phase))
+#  define KISS_FFT_SIN(phase)  floor(.5+SAMP_MAX * sin (phase))
+#  define HALF_OF(x) ((x)>>1)
+#elif defined(USE_SIMD)
+#  define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
+#  define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
+#  define HALF_OF(x) ((x)*_mm_set1_ps(.5))
+#else
+#  define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
+#  define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
+#  define HALF_OF(x) ((x)*.5)
+#endif
+
+#define  kf_cexp(x,phase) \
+	do{ \
+		(x)->r = KISS_FFT_COS(phase);\
+		(x)->i = KISS_FFT_SIN(phase);\
+	}while(0)
+
+
+/* a debugging function */
+#define pcpx(c)\
+    fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
--- a/src/kissfft/kiss_fft.c
+++ b/src/kissfft/kiss_fft.c
@@ -0,0 +1,399 @@
+/*
+Copyright (c) 2003-2004, Mark Borgerding
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+    * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "_kiss_fft_guts.h"
+/* The guts header contains all the multiplication and addition macros that are defined for
+ fixed or floating point complex numbers.  It also delares the kf_ internal functions.
+ */
+
+static kiss_fft_cpx *scratchbuf=NULL;
+static size_t nscratchbuf=0;
+static kiss_fft_cpx *tmpbuf=NULL;
+static size_t ntmpbuf=0;
+
+#define CHECKBUF(buf,nbuf,n) \
+    do { \
+        if ( nbuf < (size_t)(n) ) {\
+            free(buf); \
+            buf = (kiss_fft_cpx*)KISS_FFT_MALLOC(sizeof(kiss_fft_cpx)*(n)); \
+            nbuf = (size_t)(n); \
+        } \
+   }while(0)
+
+
+static void kf_bfly2(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        int m
+        )
+{
+    kiss_fft_cpx * Fout2;
+    kiss_fft_cpx * tw1 = st->twiddles;
+    kiss_fft_cpx t;
+    Fout2 = Fout + m;
+    do{
+        C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
+
+        C_MUL (t,  *Fout2 , *tw1);
+        tw1 += fstride;
+        C_SUB( *Fout2 ,  *Fout , t );
+        C_ADDTO( *Fout ,  t );
+        ++Fout2;
+        ++Fout;
+    }while (--m);
+}
+
+static void kf_bfly4(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        const size_t m
+        )
+{
+    kiss_fft_cpx *tw1,*tw2,*tw3;
+    kiss_fft_cpx scratch[6];
+    size_t k=m;
+    const size_t m2=2*m;
+    const size_t m3=3*m;
+
+    tw3 = tw2 = tw1 = st->twiddles;
+
+    do {
+        C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
+
+        C_MUL(scratch[0],Fout[m] , *tw1 );
+        C_MUL(scratch[1],Fout[m2] , *tw2 );
+        C_MUL(scratch[2],Fout[m3] , *tw3 );
+
+        C_SUB( scratch[5] , *Fout, scratch[1] );
+        C_ADDTO(*Fout, scratch[1]);
+        C_ADD( scratch[3] , scratch[0] , scratch[2] );
+        C_SUB( scratch[4] , scratch[0] , scratch[2] );
+        C_SUB( Fout[m2], *Fout, scratch[3] );
+        tw1 += fstride;
+        tw2 += fstride*2;
+        tw3 += fstride*3;
+        C_ADDTO( *Fout , scratch[3] );
+
+        if(st->inverse) {
+            Fout[m].r = scratch[5].r - scratch[4].i;
+            Fout[m].i = scratch[5].i + scratch[4].r;
+            Fout[m3].r = scratch[5].r + scratch[4].i;
+            Fout[m3].i = scratch[5].i - scratch[4].r;
+        }else{
+            Fout[m].r = scratch[5].r + scratch[4].i;
+            Fout[m].i = scratch[5].i - scratch[4].r;
+            Fout[m3].r = scratch[5].r - scratch[4].i;
+            Fout[m3].i = scratch[5].i + scratch[4].r;
+        }
+        ++Fout;
+    }while(--k);
+}
+
+static void kf_bfly3(
+         kiss_fft_cpx * Fout,
+         const size_t fstride,
+         const kiss_fft_cfg st,
+         size_t m
+         )
+{
+     size_t k=m;
+     const size_t m2 = 2*m;
+     kiss_fft_cpx *tw1,*tw2;
+     kiss_fft_cpx scratch[5];
+     kiss_fft_cpx epi3;
+     epi3 = st->twiddles[fstride*m];
+
+     tw1=tw2=st->twiddles;
+
+     do{
+         C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
+
+         C_MUL(scratch[1],Fout[m] , *tw1);
+         C_MUL(scratch[2],Fout[m2] , *tw2);
+
+         C_ADD(scratch[3],scratch[1],scratch[2]);
+         C_SUB(scratch[0],scratch[1],scratch[2]);
+         tw1 += fstride;
+         tw2 += fstride*2;
+
+         Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
+         Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
+
+         C_MULBYSCALAR( scratch[0] , epi3.i );
+
+         C_ADDTO(*Fout,scratch[3]);
+
+         Fout[m2].r = Fout[m].r + scratch[0].i;
+         Fout[m2].i = Fout[m].i - scratch[0].r;
+
+         Fout[m].r -= scratch[0].i;
+         Fout[m].i += scratch[0].r;
+
+         ++Fout;
+     }while(--k);
+}
+
+static void kf_bfly5(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        int m
+        )
+{
+    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
+    int u;
+    kiss_fft_cpx scratch[13];
+    kiss_fft_cpx * twiddles = st->twiddles;
+    kiss_fft_cpx *tw;
+    kiss_fft_cpx ya,yb;
+    ya = twiddles[fstride*m];
+    yb = twiddles[fstride*2*m];
+
+    Fout0=Fout;
+    Fout1=Fout0+m;
+    Fout2=Fout0+2*m;
+    Fout3=Fout0+3*m;
+    Fout4=Fout0+4*m;
+
+    tw=st->twiddles;
+    for ( u=0; u<m; ++u ) {
+        C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
+        scratch[0] = *Fout0;
+
+        C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
+        C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
+        C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
+        C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
+
+        C_ADD( scratch[7],scratch[1],scratch[4]);
+        C_SUB( scratch[10],scratch[1],scratch[4]);
+        C_ADD( scratch[8],scratch[2],scratch[3]);
+        C_SUB( scratch[9],scratch[2],scratch[3]);
+
+        Fout0->r += scratch[7].r + scratch[8].r;
+        Fout0->i += scratch[7].i + scratch[8].i;
+
+        scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
+        scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
+
+        scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
+        scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
+
+        C_SUB(*Fout1,scratch[5],scratch[6]);
+        C_ADD(*Fout4,scratch[5],scratch[6]);
+
+        scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
+        scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
+        scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
+        scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
+
+        C_ADD(*Fout2,scratch[11],scratch[12]);
+        C_SUB(*Fout3,scratch[11],scratch[12]);
+
+        ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
+    }
+}
+
+/* perform the butterfly for one stage of a mixed radix FFT */
+static void kf_bfly_generic(
+        kiss_fft_cpx * Fout,
+        const size_t fstride,
+        const kiss_fft_cfg st,
+        int m,
+        int p
+        )
+{
+    int u,k,q1,q;
+    kiss_fft_cpx * twiddles = st->twiddles;
+    kiss_fft_cpx t;
+    int Norig = st->nfft;
+
+    CHECKBUF(scratchbuf,nscratchbuf,p);
+
+    for ( u=0; u<m; ++u ) {
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+            scratchbuf[q1] = Fout[ k  ];
+            C_FIXDIV(scratchbuf[q1],p);
+            k += m;
+        }
+
+        k=u;
+        for ( q1=0 ; q1<p ; ++q1 ) {
+            int twidx=0;
+            Fout[ k ] = scratchbuf[0];
+            for (q=1;q<p;++q ) {
+                twidx += fstride * k;
+                if (twidx>=Norig) twidx-=Norig;
+                C_MUL(t,scratchbuf[q] , twiddles[twidx] );
+                C_ADDTO( Fout[ k ] ,t);
+            }
+            k += m;
+        }
+    }
+}
+
+static
+void kf_work(
+        kiss_fft_cpx * Fout,
+        const kiss_fft_cpx * f,
+        const size_t fstride,
+        int in_stride,
+        int * factors,
+        const kiss_fft_cfg st
+        )
+{
+    kiss_fft_cpx * Fout_beg=Fout;
+    const int p=*factors++; /* the radix  */
+    const int m=*factors++; /* stage's fft length/p */
+    const kiss_fft_cpx * Fout_end = Fout + p*m;
+
+    if (m==1) {
+        do{
+            *Fout = *f;
+            f += fstride*in_stride;
+        }while(++Fout != Fout_end );
+    }else{
+        do{
+            kf_work( Fout , f, fstride*p, in_stride, factors,st);
+            f += fstride*in_stride;
+        }while( (Fout += m) != Fout_end );
+    }
+
+    Fout=Fout_beg;
+
+    switch (p) {
+        case 2: kf_bfly2(Fout,fstride,st,m); break;
+        case 3: kf_bfly3(Fout,fstride,st,m); break; 
+        case 4: kf_bfly4(Fout,fstride,st,m); break;
+        case 5: kf_bfly5(Fout,fstride,st,m); break; 
+        default: kf_bfly_generic(Fout,fstride,st,m,p); break;
+    }
+}
+
+/*  facbuf is populated by p1,m1,p2,m2, ...
+    where 
+    p[i] * m[i] = m[i-1]
+    m0 = n                  */
+static 
+void kf_factor(int n,int * facbuf)
+{
+    int p=4;
+    double floor_sqrt;
+    floor_sqrt = floor( sqrt((double)n) );
+
+    /*factor out powers of 4, powers of 2, then any remaining primes */
+    do {
+        while (n % p) {
+            switch (p) {
+                case 4: p = 2; break;
+                case 2: p = 3; break;
+                default: p += 2; break;
+            }
+            if (p > floor_sqrt)
+                p = n;          /* no more factors, skip to end */
+        }
+        n /= p;
+        *facbuf++ = p;
+        *facbuf++ = n;
+    } while (n > 1);
+}
+
+/*
+ *
+ * User-callable function to allocate all necessary storage space for the fft.
+ *
+ * The return value is a contiguous block of memory, allocated with malloc.  As such,
+ * It can be freed with free(), rather than a kiss_fft-specific function.
+ * */
+kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
+{
+    kiss_fft_cfg st=NULL;
+    size_t memneeded = sizeof(struct kiss_fft_state)
+        + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
+
+    if ( lenmem==NULL ) {
+        st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
+    }else{
+        if (mem != NULL && *lenmem >= memneeded)
+            st = (kiss_fft_cfg)mem;
+        *lenmem = memneeded;
+    }
+    if (st) {
+        int i;
+        st->nfft=nfft;
+        st->inverse = inverse_fft;
+
+        for (i=0;i<nfft;++i) {
+            const double pi=3.141592653589793238462643383279502884197169399375105820974944;
+            double phase = -2*pi*i / nfft;
+            if (st->inverse)
+                phase *= -1;
+            kf_cexp(st->twiddles+i, phase );
+        }
+
+        kf_factor(nfft,st->factors);
+    }
+    return st;
+}
+
+
+
+    
+void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
+{
+    if (fin == fout) {
+        CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
+        kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
+        memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
+    }else{
+        kf_work( fout, fin, 1,in_stride, st->factors,st );
+    }
+}
+
+void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+    kiss_fft_stride(cfg,fin,fout,1);
+}
+
+
+/* not really necessary to call, but if someone is doing in-place ffts, they may want to free the 
+   buffers from CHECKBUF
+ */ 
+void kiss_fft_cleanup(void)
+{
+    free(scratchbuf);
+    scratchbuf = NULL;
+    nscratchbuf=0;
+    free(tmpbuf);
+    tmpbuf=NULL;
+    ntmpbuf=0;
+}
+
+int kiss_fft_next_fast_size(int n)
+{
+    while(1) {
+        int m=n;
+        while ( (m%2) == 0 ) m/=2;
+        while ( (m%3) == 0 ) m/=3;
+        while ( (m%5) == 0 ) m/=5;
+        if (m<=1)
+            break; /* n is completely factorable by twos, threes, and fives */
+        n++;
+    }
+    return n;
+}
--- a/src/kissfft/kiss_fft.h
+++ b/src/kissfft/kiss_fft.h
@@ -0,0 +1,121 @@
+#ifndef KISS_FFT_H
+#define KISS_FFT_H
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <memory.h>
+#ifndef __APPLE__
+#include <malloc.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ ATTENTION!
+ If you would like a :
+ -- a utility that will handle the caching of fft objects
+ -- real-only (no imaginary time component ) FFT
+ -- a multi-dimensional FFT
+ -- a command-line utility to perform ffts
+ -- a command-line utility to perform fast-convolution filtering
+
+ Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c
+  in the tools/ directory.
+*/
+
+#ifdef USE_SIMD
+# include <xmmintrin.h>
+# define kiss_fft_scalar __m128
+#define KISS_FFT_MALLOC(nbytes) memalign(16,nbytes)
+#else	
+#define KISS_FFT_MALLOC malloc
+#endif	
+
+
+#ifdef FIXED_POINT
+#include <sys/types.h>	
+# if (FIXED_POINT == 32)
+#  define kiss_fft_scalar int32_t
+# else	
+#  define kiss_fft_scalar int16_t
+# endif
+#else
+# ifndef kiss_fft_scalar
+/*  default is float */
+#   define kiss_fft_scalar float
+# endif
+#endif
+
+typedef struct {
+    kiss_fft_scalar r;
+    kiss_fft_scalar i;
+}kiss_fft_cpx;
+
+typedef struct kiss_fft_state* kiss_fft_cfg;
+
+/* 
+ *  kiss_fft_alloc
+ *  
+ *  Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
+ *
+ *  typical usage:      kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
+ *
+ *  The return value from fft_alloc is a cfg buffer used internally
+ *  by the fft routine or NULL.
+ *
+ *  If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
+ *  The returned value should be free()d when done to avoid memory leaks.
+ *  
+ *  The state can be placed in a user supplied buffer 'mem':
+ *  If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
+ *      then the function places the cfg in mem and the size used in *lenmem
+ *      and returns mem.
+ *  
+ *  If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
+ *      then the function returns NULL and places the minimum cfg 
+ *      buffer size in *lenmem.
+ * */
+
+kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); 
+
+/*
+ * kiss_fft(cfg,in_out_buf)
+ *
+ * Perform an FFT on a complex input buffer.
+ * for a forward FFT,
+ * fin should be  f[0] , f[1] , ... ,f[nfft-1]
+ * fout will be   F[0] , F[1] , ... ,F[nfft-1]
+ * Note that each element is complex and can be accessed like
+    f[k].r and f[k].i
+ * */
+void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
+
+/*
+ A more generic version of the above function. It reads its input from every Nth sample.
+ * */
+void kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride);
+
+/* If kiss_fft_alloc allocated a buffer, it is one contiguous 
+   buffer and can be simply free()d when no longer needed*/
+#define kiss_fft_free free
+
+/*
+ Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up 
+ your compiler output to call this before you exit.
+*/
+void kiss_fft_cleanup(void);
+	
+
+/*
+ * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
+ */
+int kiss_fft_next_fast_size(int n);
+
+#ifdef __cplusplus
+} 
+#endif
+
+#endif
--- a/src/kissfft/kiss_fftr.c
+++ b/src/kissfft/kiss_fftr.c
@@ -0,0 +1,159 @@
+/*
+Copyright (c) 2003-2004, Mark Borgerding
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+    * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "kiss_fftr.h"
+#include "_kiss_fft_guts.h"
+
+struct kiss_fftr_state{
+    kiss_fft_cfg substate;
+    kiss_fft_cpx * tmpbuf;
+    kiss_fft_cpx * super_twiddles;
+#ifdef USE_SIMD    
+    long pad;
+#endif    
+};
+
+kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
+{
+    int i;
+    kiss_fftr_cfg st = NULL;
+    size_t subsize, memneeded;
+
+    if (nfft & 1) {
+        fprintf(stderr,"Real FFT optimization must be even.\n");
+        return NULL;
+    }
+    nfft >>= 1;
+
+    kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
+    memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 2);
+
+    if (lenmem == NULL) {
+        st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
+    } else {
+        if (*lenmem >= memneeded)
+            st = (kiss_fftr_cfg) mem;
+        *lenmem = memneeded;
+    }
+    if (!st)
+        return NULL;
+
+    st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
+    st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
+    st->super_twiddles = st->tmpbuf + nfft;
+    kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
+
+    for (i = 0; i < nfft; ++i) {
+        double phase =
+            -3.14159265358979323846264338327 * ((double) i / nfft + .5);
+        if (inverse_fft)
+            phase *= -1;
+        kf_cexp (st->super_twiddles+i,phase);
+    }
+    return st;
+}
+
+void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
+{
+    /* input buffer timedata is stored row-wise */
+    int k,ncfft;
+    kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
+
+    if ( st->substate->inverse) {
+        fprintf(stderr,"kiss fft usage error: improper alloc\n");
+        exit(1);
+    }
+
+    ncfft = st->substate->nfft;
+
+    /*perform the parallel fft of two real signals packed in real,imag*/
+    kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
+    /* The real part of the DC element of the frequency spectrum in st->tmpbuf
+     * contains the sum of the even-numbered elements of the input time sequence
+     * The imag part is the sum of the odd-numbered elements
+     *
+     * The sum of tdc.r and tdc.i is the sum of the input time sequence. 
+     *      yielding DC of input time sequence
+     * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... 
+     *      yielding Nyquist bin of input time sequence
+     */
+ 
+    tdc.r = st->tmpbuf[0].r;
+    tdc.i = st->tmpbuf[0].i;
+    C_FIXDIV(tdc,2);
+    CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
+    CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
+    freqdata[0].r = tdc.r + tdc.i;
+    freqdata[ncfft].r = tdc.r - tdc.i;
+#ifdef USE_SIMD    
+    freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
+#else
+    freqdata[ncfft].i = freqdata[0].i = 0;
+#endif
+
+    for ( k=1;k <= ncfft/2 ; ++k ) {
+        fpk    = st->tmpbuf[k]; 
+        fpnk.r =   st->tmpbuf[ncfft-k].r;
+        fpnk.i = - st->tmpbuf[ncfft-k].i;
+        C_FIXDIV(fpk,2);
+        C_FIXDIV(fpnk,2);
+
+        C_ADD( f1k, fpk , fpnk );
+        C_SUB( f2k, fpk , fpnk );
+        C_MUL( tw , f2k , st->super_twiddles[k]);
+
+        freqdata[k].r = HALF_OF(f1k.r + tw.r);
+        freqdata[k].i = HALF_OF(f1k.i + tw.i);
+        freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
+        freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
+    }
+}
+
+void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
+{
+    /* input buffer timedata is stored row-wise */
+    int k, ncfft;
+
+    if (st->substate->inverse == 0) {
+        fprintf (stderr, "kiss fft usage error: improper alloc\n");
+        exit (1);
+    }
+
+    ncfft = st->substate->nfft;
+
+    st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
+    st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
+    C_FIXDIV(st->tmpbuf[0],2);
+
+    for (k = 1; k <= ncfft / 2; ++k) {
+        kiss_fft_cpx fk, fnkc, fek, fok, tmp;
+        fk = freqdata[k];
+        fnkc.r = freqdata[ncfft - k].r;
+        fnkc.i = -freqdata[ncfft - k].i;
+        C_FIXDIV( fk , 2 );
+        C_FIXDIV( fnkc , 2 );
+
+        C_ADD (fek, fk, fnkc);
+        C_SUB (tmp, fk, fnkc);
+        C_MUL (fok, tmp, st->super_twiddles[k]);
+        C_ADD (st->tmpbuf[k],     fek, fok);
+        C_SUB (st->tmpbuf[ncfft - k], fek, fok);
+#ifdef USE_SIMD        
+        st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
+#else
+        st->tmpbuf[ncfft - k].i *= -1;
+#endif
+    }
+    kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
+}
--- a/src/kissfft/kiss_fftr.h
+++ b/src/kissfft/kiss_fftr.h
@@ -0,0 +1,46 @@
+#ifndef KISS_FTR_H
+#define KISS_FTR_H
+
+#include "kiss_fft.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+    
+/* 
+ 
+ Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
+
+ 
+ 
+ */
+
+typedef struct kiss_fftr_state *kiss_fftr_cfg;
+
+
+kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
+/*
+ nfft must be even
+
+ If you don't care to allocate space, use mem = lenmem = NULL 
+*/
+
+
+void kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata);
+/*
+ input timedata has nfft scalar points
+ output freqdata has nfft/2+1 complex points
+*/
+
+void kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata);
+/*
+ input freqdata has  nfft/2+1 complex points
+ output timedata has nfft scalar points
+*/
+
+#define kiss_fftr_free free
+
+#ifdef __cplusplus
+}
+#endif
+#endif
--- a/src/pommier/neon_mathfun.h
+++ b/src/pommier/neon_mathfun.h
@@ -0,0 +1,301 @@
+/* NEON implementation of sin, cos, exp and log
+
+   Inspired by Intel Approximate Math library, and based on the
+   corresponding algorithms of the cephes math library
+*/
+
+/* Copyright (C) 2011  Julien Pommier
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  (this is the zlib license)
+*/
+
+#include <arm_neon.h>
+
+typedef float32x4_t v4sf;  // vector of 4 float
+typedef uint32x4_t v4su;  // vector of 4 uint32
+typedef int32x4_t v4si;  // vector of 4 uint32
+
+#define c_inv_mant_mask ~0x7f800000u
+#define c_cephes_SQRTHF 0.707106781186547524
+#define c_cephes_log_p0 7.0376836292E-2
+#define c_cephes_log_p1 - 1.1514610310E-1
+#define c_cephes_log_p2 1.1676998740E-1
+#define c_cephes_log_p3 - 1.2420140846E-1
+#define c_cephes_log_p4 + 1.4249322787E-1
+#define c_cephes_log_p5 - 1.6668057665E-1
+#define c_cephes_log_p6 + 2.0000714765E-1
+#define c_cephes_log_p7 - 2.4999993993E-1
+#define c_cephes_log_p8 + 3.3333331174E-1
+#define c_cephes_log_q1 -2.12194440e-4
+#define c_cephes_log_q2 0.693359375
+
+/* natural logarithm computed for 4 simultaneous float 
+   return NaN for x <= 0
+*/
+v4sf log_ps(v4sf x) {
+  v4sf one = vdupq_n_f32(1);
+
+  x = vmaxq_f32(x, vdupq_n_f32(0)); /* force flush to zero on denormal values */
+  v4su invalid_mask = vcleq_f32(x, vdupq_n_f32(0));
+
+  v4si ux = vreinterpretq_s32_f32(x);
+  
+  v4si emm0 = vshrq_n_s32(ux, 23);
+
+  /* keep only the fractional part */
+  ux = vandq_s32(ux, vdupq_n_s32(c_inv_mant_mask));
+  ux = vorrq_s32(ux, vreinterpretq_s32_f32(vdupq_n_f32(0.5f)));
+  x = vreinterpretq_f32_s32(ux);
+
+  emm0 = vsubq_s32(emm0, vdupq_n_s32(0x7f));
+  v4sf e = vcvtq_f32_s32(emm0);
+
+  e = vaddq_f32(e, one);
+
+  /* part2: 
+     if( x < SQRTHF ) {
+       e -= 1;
+       x = x + x - 1.0;
+     } else { x = x - 1.0; }
+  */
+  v4su mask = vcltq_f32(x, vdupq_n_f32(c_cephes_SQRTHF));
+  v4sf tmp = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(x), mask));
+  x = vsubq_f32(x, one);
+  e = vsubq_f32(e, vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(one), mask)));
+  x = vaddq_f32(x, tmp);
+
+  v4sf z = vmulq_f32(x,x);
+
+  v4sf y = vdupq_n_f32(c_cephes_log_p0);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p1));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p2));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p3));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p4));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p5));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p6));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p7));
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p8));
+  y = vmulq_f32(y, x);
+
+  y = vmulq_f32(y, z);
+  
+
+  tmp = vmulq_f32(e, vdupq_n_f32(c_cephes_log_q1));
+  y = vaddq_f32(y, tmp);
+
+
+  tmp = vmulq_f32(z, vdupq_n_f32(0.5f));
+  y = vsubq_f32(y, tmp);
+
+  tmp = vmulq_f32(e, vdupq_n_f32(c_cephes_log_q2));
+  x = vaddq_f32(x, y);
+  x = vaddq_f32(x, tmp);
+  x = vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(x), invalid_mask)); // negative arg will be NAN
+  return x;
+}
+
+#define c_exp_hi 88.3762626647949f
+#define c_exp_lo -88.3762626647949f
+
+#define c_cephes_LOG2EF 1.44269504088896341
+#define c_cephes_exp_C1 0.693359375
+#define c_cephes_exp_C2 -2.12194440e-4
+
+#define c_cephes_exp_p0 1.9875691500E-4
+#define c_cephes_exp_p1 1.3981999507E-3
+#define c_cephes_exp_p2 8.3334519073E-3
+#define c_cephes_exp_p3 4.1665795894E-2
+#define c_cephes_exp_p4 1.6666665459E-1
+#define c_cephes_exp_p5 5.0000001201E-1
+
+/* exp() computed for 4 float at once */
+v4sf exp_ps(v4sf x) {
+  v4sf tmp, fx;
+
+  v4sf one = vdupq_n_f32(1);
+  x = vminq_f32(x, vdupq_n_f32(c_exp_hi));
+  x = vmaxq_f32(x, vdupq_n_f32(c_exp_lo));
+
+  /* express exp(x) as exp(g + n*log(2)) */
+  fx = vmlaq_f32(vdupq_n_f32(0.5f), x, vdupq_n_f32(c_cephes_LOG2EF));
+
+  /* perform a floorf */
+  tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx));
+
+  /* if greater, substract 1 */
+  v4su mask = vcgtq_f32(tmp, fx);    
+  mask = vandq_u32(mask, vreinterpretq_u32_f32(one));
+
+
+  fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
+
+  tmp = vmulq_f32(fx, vdupq_n_f32(c_cephes_exp_C1));
+  v4sf z = vmulq_f32(fx, vdupq_n_f32(c_cephes_exp_C2));
+  x = vsubq_f32(x, tmp);
+  x = vsubq_f32(x, z);
+
+  static const float32_t cephes_exp_p[6] = { c_cephes_exp_p0, c_cephes_exp_p1, c_cephes_exp_p2, c_cephes_exp_p3, c_cephes_exp_p4, c_cephes_exp_p5 };
+  v4sf y = vld1q_dup_f32(cephes_exp_p+0);
+  v4sf c1 = vld1q_dup_f32(cephes_exp_p+1); 
+  v4sf c2 = vld1q_dup_f32(cephes_exp_p+2); 
+  v4sf c3 = vld1q_dup_f32(cephes_exp_p+3); 
+  v4sf c4 = vld1q_dup_f32(cephes_exp_p+4); 
+  v4sf c5 = vld1q_dup_f32(cephes_exp_p+5);
+
+  y = vmulq_f32(y, x);
+  z = vmulq_f32(x,x);
+  y = vaddq_f32(y, c1);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, c2);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, c3);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, c4);
+  y = vmulq_f32(y, x);
+  y = vaddq_f32(y, c5);
+  
+  y = vmulq_f32(y, z);
+  y = vaddq_f32(y, x);
+  y = vaddq_f32(y, one);
+
+  /* build 2^n */
+  int32x4_t mm;
+  mm = vcvtq_s32_f32(fx);
+  mm = vaddq_s32(mm, vdupq_n_s32(0x7f));
+  mm = vshlq_n_s32(mm, 23);
+  v4sf pow2n = vreinterpretq_f32_s32(mm);
+
+  y = vmulq_f32(y, pow2n);
+  return y;
+}
+
+#define c_minus_cephes_DP1 -0.78515625
+#define c_minus_cephes_DP2 -2.4187564849853515625e-4
+#define c_minus_cephes_DP3 -3.77489497744594108e-8
+#define c_sincof_p0 -1.9515295891E-4
+#define c_sincof_p1  8.3321608736E-3
+#define c_sincof_p2 -1.6666654611E-1
+#define c_coscof_p0  2.443315711809948E-005
+#define c_coscof_p1 -1.388731625493765E-003
+#define c_coscof_p2  4.166664568298827E-002
+#define c_cephes_FOPI 1.27323954473516 // 4 / M_PI
+
+/* evaluation of 4 sines & cosines at once.
+
+   The code is the exact rewriting of the cephes sinf function.
+   Precision is excellent as long as x < 8192 (I did not bother to
+   take into account the special handling they have for greater values
+   -- it does not return garbage for arguments over 8192, though, but
+   the extra precision is missing).
+
+   Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
+   surprising but correct result.
+
+   Note also that when you compute sin(x), cos(x) is available at
+   almost no extra price so both sin_ps and cos_ps make use of
+   sincos_ps..
+  */
+void sincos_ps(v4sf x, v4sf *ysin, v4sf *ycos) { // any x
+  v4sf xmm1, xmm2, xmm3, y;
+
+  v4su emm2;
+  
+  v4su sign_mask_sin, sign_mask_cos;
+  sign_mask_sin = vcltq_f32(x, vdupq_n_f32(0));
+  x = vabsq_f32(x);
+
+  /* scale by 4/Pi */
+  y = vmulq_f32(x, vdupq_n_f32(c_cephes_FOPI));
+
+  /* store the integer part of y in mm0 */
+  emm2 = vcvtq_u32_f32(y);
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = vaddq_u32(emm2, vdupq_n_u32(1));
+  emm2 = vandq_u32(emm2, vdupq_n_u32(~1));
+  y = vcvtq_f32_u32(emm2);
+
+  /* get the polynom selection mask 
+     there is one polynom for 0 <= x <= Pi/4
+     and another one for Pi/4<x<=Pi/2
+
+     Both branches will be computed.
+  */
+  v4su poly_mask = vtstq_u32(emm2, vdupq_n_u32(2));
+  
+  /* The magic pass: "Extended precision modular arithmetic" 
+     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = vmulq_n_f32(y, c_minus_cephes_DP1);
+  xmm2 = vmulq_n_f32(y, c_minus_cephes_DP2);
+  xmm3 = vmulq_n_f32(y, c_minus_cephes_DP3);
+  x = vaddq_f32(x, xmm1);
+  x = vaddq_f32(x, xmm2);
+  x = vaddq_f32(x, xmm3);
+
+  sign_mask_sin = veorq_u32(sign_mask_sin, vtstq_u32(emm2, vdupq_n_u32(4)));
+  sign_mask_cos = vtstq_u32(vsubq_u32(emm2, vdupq_n_u32(2)), vdupq_n_u32(4));
+
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) in y1, 
+     and the second polynom      (Pi/4 <= x <= 0) in y2 */
+  v4sf z = vmulq_f32(x,x);
+  v4sf y1, y2;
+
+  y1 = vmulq_n_f32(z, c_coscof_p0);
+  y2 = vmulq_n_f32(z, c_sincof_p0);
+  y1 = vaddq_f32(y1, vdupq_n_f32(c_coscof_p1));
+  y2 = vaddq_f32(y2, vdupq_n_f32(c_sincof_p1));
+  y1 = vmulq_f32(y1, z);
+  y2 = vmulq_f32(y2, z);
+  y1 = vaddq_f32(y1, vdupq_n_f32(c_coscof_p2));
+  y2 = vaddq_f32(y2, vdupq_n_f32(c_sincof_p2));
+  y1 = vmulq_f32(y1, z);
+  y2 = vmulq_f32(y2, z);
+  y1 = vmulq_f32(y1, z);
+  y2 = vmulq_f32(y2, x);
+  y1 = vsubq_f32(y1, vmulq_f32(z, vdupq_n_f32(0.5f)));
+  y2 = vaddq_f32(y2, x);
+  y1 = vaddq_f32(y1, vdupq_n_f32(1));
+
+  /* select the correct result from the two polynoms */  
+  v4sf ys = vbslq_f32(poly_mask, y1, y2);
+  v4sf yc = vbslq_f32(poly_mask, y2, y1);
+  *ysin = vbslq_f32(sign_mask_sin, vnegq_f32(ys), ys);
+  *ycos = vbslq_f32(sign_mask_cos, yc, vnegq_f32(yc));
+}
+
+v4sf sin_ps(v4sf x) {
+  v4sf ysin, ycos; 
+  sincos_ps(x, &ysin, &ycos); 
+  return ysin;
+}
+
+v4sf cos_ps(v4sf x) {
+  v4sf ysin, ycos; 
+  sincos_ps(x, &ysin, &ycos); 
+  return ycos;
+}
+
+
--- a/src/pommier/sse_mathfun.h
+++ b/src/pommier/sse_mathfun.h
@@ -0,0 +1,766 @@
+
+#ifndef _POMMIER_SSE_MATHFUN_H_
+#define _POMMIER_SSE_MATHFUN_H_
+
+/* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log
+
+   Inspired by Intel Approximate Math library, and based on the
+   corresponding algorithms of the cephes math library
+
+   The default is to use the SSE1 version. If you define USE_SSE2 the
+   the SSE2 intrinsics will be used in place of the MMX intrinsics. Do
+   not expect any significant performance improvement with SSE2.
+*/
+
+/* Copyright (C) 2007  Julien Pommier
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  (this is the zlib license)
+*/
+
+#include <xmmintrin.h>
+
+/* yes I know, the top of this file is quite ugly */
+
+#ifdef _MSC_VER /* visual c++ */
+# define ALIGN16_BEG __declspec(align(16))
+# define ALIGN16_END 
+#else /* gcc or icc */
+# define ALIGN16_BEG
+# define ALIGN16_END __attribute__((aligned(16)))
+#endif
+
+/* __m128 is ugly to write */
+typedef __m128 v4sf;  // vector of 4 float (sse1)
+
+#ifdef USE_SSE2
+# include <emmintrin.h>
+typedef __m128i v4si; // vector of 4 int (sse2)
+#else
+typedef __m64 v2si;   // vector of 2 int (mmx)
+#endif
+
+/* declare some SSE constants -- why can't I figure a better way to do that? */
+#define _PS_CONST(Name, Val)                                            \
+  static const ALIGN16_BEG float _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
+#define _PI32_CONST(Name, Val)                                            \
+  static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
+#define _PS_CONST_TYPE(Name, Type, Val)                                 \
+  static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
+
+_PS_CONST(1  , 1.0f);
+_PS_CONST(0p5, 0.5f);
+/* the smallest non denormalized float number */
+_PS_CONST_TYPE(min_norm_pos, int, 0x00800000);
+_PS_CONST_TYPE(mant_mask, int, 0x7f800000);
+_PS_CONST_TYPE(inv_mant_mask, int, ~0x7f800000);
+
+_PS_CONST_TYPE(sign_mask, int, 0x80000000);
+_PS_CONST_TYPE(inv_sign_mask, int, ~0x80000000);
+
+_PI32_CONST(1, 1);
+_PI32_CONST(inv1, ~1);
+_PI32_CONST(2, 2);
+_PI32_CONST(4, 4);
+_PI32_CONST(0x7f, 0x7f);
+
+_PS_CONST(cephes_SQRTHF, 0.707106781186547524);
+_PS_CONST(cephes_log_p0, 7.0376836292E-2);
+_PS_CONST(cephes_log_p1, - 1.1514610310E-1);
+_PS_CONST(cephes_log_p2, 1.1676998740E-1);
+_PS_CONST(cephes_log_p3, - 1.2420140846E-1);
+_PS_CONST(cephes_log_p4, + 1.4249322787E-1);
+_PS_CONST(cephes_log_p5, - 1.6668057665E-1);
+_PS_CONST(cephes_log_p6, + 2.0000714765E-1);
+_PS_CONST(cephes_log_p7, - 2.4999993993E-1);
+_PS_CONST(cephes_log_p8, + 3.3333331174E-1);
+_PS_CONST(cephes_log_q1, -2.12194440e-4);
+_PS_CONST(cephes_log_q2, 0.693359375);
+
+#if defined (__MINGW32__)
+
+/* the ugly part below: many versions of gcc used to be completely buggy with respect to some intrinsics
+   The movehl_ps is fixed in mingw 3.4.5, but I found out that all the _mm_cmp* intrinsics were completely
+   broken on my mingw gcc 3.4.5 ...
+
+   Note that the bug on _mm_cmp* does occur only at -O0 optimization level
+*/
+
+inline __m128 my_movehl_ps(__m128 a, const __m128 b) {
+	asm (
+			"movhlps %2,%0\n\t"
+			: "=x" (a)
+			: "0" (a), "x"(b)
+	    );
+	return a;                                 }
+#warning "redefined _mm_movehl_ps (see gcc bug 21179)"
+#define _mm_movehl_ps my_movehl_ps
+
+inline __m128 my_cmplt_ps(__m128 a, const __m128 b) {
+	asm (
+			"cmpltps %2,%0\n\t"
+			: "=x" (a)
+			: "0" (a), "x"(b)
+	    );
+	return a;               
+                  }
+inline __m128 my_cmpgt_ps(__m128 a, const __m128 b) {
+	asm (
+			"cmpnleps %2,%0\n\t"
+			: "=x" (a)
+			: "0" (a), "x"(b)
+	    );
+	return a;               
+}
+inline __m128 my_cmpeq_ps(__m128 a, const __m128 b) {
+	asm (
+			"cmpeqps %2,%0\n\t"
+			: "=x" (a)
+			: "0" (a), "x"(b)
+	    );
+	return a;               
+}
+#warning "redefined _mm_cmpxx_ps functions..."
+#define _mm_cmplt_ps my_cmplt_ps
+#define _mm_cmpgt_ps my_cmpgt_ps
+#define _mm_cmpeq_ps my_cmpeq_ps
+#endif
+
+#ifndef USE_SSE2
+typedef union xmm_mm_union {
+  __m128 xmm;
+  __m64 mm[2];
+} xmm_mm_union;
+
+#define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) {          \
+    xmm_mm_union u; u.xmm = xmm_;                   \
+    mm0_ = u.mm[0];                                 \
+    mm1_ = u.mm[1];                                 \
+}
+
+#define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) {                         \
+    xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm;      \
+  }
+
+#endif // USE_SSE2
+
+/* natural logarithm computed for 4 simultaneous float 
+   return NaN for x <= 0
+*/
+v4sf log_ps(v4sf x) {
+#ifdef USE_SSE2
+  v4si emm0;
+#else
+  v2si mm0, mm1;
+#endif
+  v4sf one = *(v4sf*)_ps_1;
+
+  v4sf invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
+
+  x = _mm_max_ps(x, *(v4sf*)_ps_min_norm_pos);  /* cut off denormalized stuff */
+
+#ifndef USE_SSE2
+  /* part 1: x = frexpf(x, &e); */
+  COPY_XMM_TO_MM(x, mm0, mm1);
+  mm0 = _mm_srli_pi32(mm0, 23);
+  mm1 = _mm_srli_pi32(mm1, 23);
+#else
+  emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
+#endif
+  /* keep only the fractional part */
+  x = _mm_and_ps(x, *(v4sf*)_ps_inv_mant_mask);
+  x = _mm_or_ps(x, *(v4sf*)_ps_0p5);
+
+#ifndef USE_SSE2
+  /* now e=mm0:mm1 contain the really base-2 exponent */
+  mm0 = _mm_sub_pi32(mm0, *(v2si*)_pi32_0x7f);
+  mm1 = _mm_sub_pi32(mm1, *(v2si*)_pi32_0x7f);
+  v4sf e = _mm_cvtpi32x2_ps(mm0, mm1);
+  _mm_empty(); /* bye bye mmx */
+#else
+  emm0 = _mm_sub_epi32(emm0, *(v4si*)_pi32_0x7f);
+  v4sf e = _mm_cvtepi32_ps(emm0);
+#endif
+
+  e = _mm_add_ps(e, one);
+
+  /* part2: 
+     if( x < SQRTHF ) {
+       e -= 1;
+       x = x + x - 1.0;
+     } else { x = x - 1.0; }
+  */
+  v4sf mask = _mm_cmplt_ps(x, *(v4sf*)_ps_cephes_SQRTHF);
+  v4sf tmp = _mm_and_ps(x, mask);
+  x = _mm_sub_ps(x, one);
+  e = _mm_sub_ps(e, _mm_and_ps(one, mask));
+  x = _mm_add_ps(x, tmp);
+
+
+  v4sf z = _mm_mul_ps(x,x);
+
+  v4sf y = *(v4sf*)_ps_cephes_log_p0;
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p1);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p2);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p3);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p4);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p5);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p6);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p7);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p8);
+  y = _mm_mul_ps(y, x);
+
+  y = _mm_mul_ps(y, z);
+  
+
+  tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q1);
+  y = _mm_add_ps(y, tmp);
+
+
+  tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
+  y = _mm_sub_ps(y, tmp);
+
+  tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q2);
+  x = _mm_add_ps(x, y);
+  x = _mm_add_ps(x, tmp);
+  x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN
+  return x;
+}
+
+_PS_CONST(exp_hi,	88.3762626647949f);
+_PS_CONST(exp_lo,	-88.3762626647949f);
+
+_PS_CONST(cephes_LOG2EF, 1.44269504088896341);
+_PS_CONST(cephes_exp_C1, 0.693359375);
+_PS_CONST(cephes_exp_C2, -2.12194440e-4);
+
+_PS_CONST(cephes_exp_p0, 1.9875691500E-4);
+_PS_CONST(cephes_exp_p1, 1.3981999507E-3);
+_PS_CONST(cephes_exp_p2, 8.3334519073E-3);
+_PS_CONST(cephes_exp_p3, 4.1665795894E-2);
+_PS_CONST(cephes_exp_p4, 1.6666665459E-1);
+_PS_CONST(cephes_exp_p5, 5.0000001201E-1);
+
+v4sf exp_ps(v4sf x) {
+  v4sf tmp = _mm_setzero_ps(), fx;
+#ifdef USE_SSE2
+  v4si emm0;
+#else
+  v2si mm0, mm1;
+#endif
+  v4sf one = *(v4sf*)_ps_1;
+
+  x = _mm_min_ps(x, *(v4sf*)_ps_exp_hi);
+  x = _mm_max_ps(x, *(v4sf*)_ps_exp_lo);
+
+  /* express exp(x) as exp(g + n*log(2)) */
+  fx = _mm_mul_ps(x, *(v4sf*)_ps_cephes_LOG2EF);
+  fx = _mm_add_ps(fx, *(v4sf*)_ps_0p5);
+
+  /* how to perform a floorf with SSE: just below */
+#ifndef USE_SSE2
+  /* step 1 : cast to int */
+  tmp = _mm_movehl_ps(tmp, fx);
+  mm0 = _mm_cvttps_pi32(fx);
+  mm1 = _mm_cvttps_pi32(tmp);
+  /* step 2 : cast back to float */
+  tmp = _mm_cvtpi32x2_ps(mm0, mm1);
+#else
+  emm0 = _mm_cvttps_epi32(fx);
+  tmp  = _mm_cvtepi32_ps(emm0);
+#endif
+  /* if greater, substract 1 */
+  v4sf mask = _mm_cmpgt_ps(tmp, fx);    
+  mask = _mm_and_ps(mask, one);
+  fx = _mm_sub_ps(tmp, mask);
+
+  tmp = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C1);
+  v4sf z = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C2);
+  x = _mm_sub_ps(x, tmp);
+  x = _mm_sub_ps(x, z);
+
+  z = _mm_mul_ps(x,x);
+  
+  v4sf y = *(v4sf*)_ps_cephes_exp_p0;
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p1);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p2);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p3);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p4);
+  y = _mm_mul_ps(y, x);
+  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p5);
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, x);
+  y = _mm_add_ps(y, one);
+
+  /* build 2^n */
+#ifndef USE_SSE2
+  z = _mm_movehl_ps(z, fx);
+  mm0 = _mm_cvttps_pi32(fx);
+  mm1 = _mm_cvttps_pi32(z);
+  mm0 = _mm_add_pi32(mm0, *(v2si*)_pi32_0x7f);
+  mm1 = _mm_add_pi32(mm1, *(v2si*)_pi32_0x7f);
+  mm0 = _mm_slli_pi32(mm0, 23); 
+  mm1 = _mm_slli_pi32(mm1, 23);
+  
+  v4sf pow2n; 
+  COPY_MM_TO_XMM(mm0, mm1, pow2n);
+  _mm_empty();
+#else
+  emm0 = _mm_cvttps_epi32(fx);
+  emm0 = _mm_add_epi32(emm0, *(v4si*)_pi32_0x7f);
+  emm0 = _mm_slli_epi32(emm0, 23);
+  v4sf pow2n = _mm_castsi128_ps(emm0);
+#endif
+  y = _mm_mul_ps(y, pow2n);
+  return y;
+}
+
+_PS_CONST(minus_cephes_DP1, -0.78515625);
+_PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
+_PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
+_PS_CONST(sincof_p0, -1.9515295891E-4);
+_PS_CONST(sincof_p1,  8.3321608736E-3);
+_PS_CONST(sincof_p2, -1.6666654611E-1);
+_PS_CONST(coscof_p0,  2.443315711809948E-005);
+_PS_CONST(coscof_p1, -1.388731625493765E-003);
+_PS_CONST(coscof_p2,  4.166664568298827E-002);
+_PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
+
+
+/* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so
+   it runs also on old athlons XPs and the pentium III of your grand
+   mother.
+
+   The code is the exact rewriting of the cephes sinf function.
+   Precision is excellent as long as x < 8192 (I did not bother to
+   take into account the special handling they have for greater values
+   -- it does not return garbage for arguments over 8192, though, but
+   the extra precision is missing).
+
+   Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
+   surprising but correct result.
+
+   Performance is also surprisingly good, 1.33 times faster than the
+   macos vsinf SSE2 function, and 1.5 times faster than the
+   __vrs4_sinf of amd's ACML (which is only available in 64 bits). Not
+   too bad for an SSE1 function (with no special tuning) !
+   However the latter libraries probably have a much better handling of NaN,
+   Inf, denormalized and other special arguments..
+
+   On my core 1 duo, the execution of this function takes approximately 95 cycles.
+
+   From what I have observed on the experiments with Intel AMath lib, switching to an
+   SSE2 version would improve the perf by only 10%.
+
+   Since it is based on SSE intrinsics, it has to be compiled at -O2 to
+   deliver full speed.
+*/
+v4sf sin_ps(v4sf x) { // any x
+  v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
+
+#ifdef USE_SSE2
+  v4si emm0, emm2;
+#else
+  v2si mm0, mm1, mm2, mm3;
+#endif
+  sign_bit = x;
+  /* take the absolute value */
+  x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
+  /* extract the sign bit (upper one) */
+  sign_bit = _mm_and_ps(sign_bit, *(v4sf*)_ps_sign_mask);
+  
+  /* scale by 4/Pi */
+  y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
+
+  //printf("plop:"); print4(y); 
+#ifdef USE_SSE2
+  /* store the integer part of y in mm0 */
+  emm2 = _mm_cvttps_epi32(y);
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
+  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
+  y = _mm_cvtepi32_ps(emm2);
+  /* get the swap sign flag */
+  emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
+  emm0 = _mm_slli_epi32(emm0, 29);
+  /* get the polynom selection mask 
+     there is one polynom for 0 <= x <= Pi/4
+     and another one for Pi/4<x<=Pi/2
+
+     Both branches will be computed.
+  */
+  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
+  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+  
+  v4sf swap_sign_bit = _mm_castsi128_ps(emm0);
+  v4sf poly_mask = _mm_castsi128_ps(emm2);
+  sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+#else
+  /* store the integer part of y in mm0:mm1 */
+  xmm2 = _mm_movehl_ps(xmm2, y);
+  mm2 = _mm_cvttps_pi32(y);
+  mm3 = _mm_cvttps_pi32(xmm2);
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
+  mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
+  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
+  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
+  y = _mm_cvtpi32x2_ps(mm2, mm3);
+  /* get the swap sign flag */
+  mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
+  mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
+  mm0 = _mm_slli_pi32(mm0, 29);
+  mm1 = _mm_slli_pi32(mm1, 29);
+  /* get the polynom selection mask */
+  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
+  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
+  mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
+  mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
+  v4sf swap_sign_bit, poly_mask;
+  COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit);
+  COPY_MM_TO_XMM(mm2, mm3, poly_mask);
+  sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
+  _mm_empty(); /* good-bye mmx */
+#endif
+  
+  /* The magic pass: "Extended precision modular arithmetic" 
+     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
+  xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
+  xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
+  xmm1 = _mm_mul_ps(y, xmm1);
+  xmm2 = _mm_mul_ps(y, xmm2);
+  xmm3 = _mm_mul_ps(y, xmm3);
+  x = _mm_add_ps(x, xmm1);
+  x = _mm_add_ps(x, xmm2);
+  x = _mm_add_ps(x, xmm3);
+
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+  y = *(v4sf*)_ps_coscof_p0;
+  v4sf z = _mm_mul_ps(x,x);
+
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
+  y = _mm_mul_ps(y, z);
+  y = _mm_mul_ps(y, z);
+  v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
+  y = _mm_sub_ps(y, tmp);
+  y = _mm_add_ps(y, *(v4sf*)_ps_1);
+  
+  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+
+  v4sf y2 = *(v4sf*)_ps_sincof_p0;
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_mul_ps(y2, x);
+  y2 = _mm_add_ps(y2, x);
+
+  /* select the correct result from the two polynoms */  
+  xmm3 = poly_mask;
+  y2 = _mm_and_ps(xmm3, y2); //, xmm3);
+  y = _mm_andnot_ps(xmm3, y);
+  y = _mm_add_ps(y,y2);
+  /* update the sign */
+  y = _mm_xor_ps(y, sign_bit);
+
+  return y;
+}
+
+/* almost the same as sin_ps */
+v4sf cos_ps(v4sf x) { // any x
+  v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
+#ifdef USE_SSE2
+  v4si emm0, emm2;
+#else
+  v2si mm0, mm1, mm2, mm3;
+#endif
+  /* take the absolute value */
+  x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
+  
+  /* scale by 4/Pi */
+  y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
+  
+#ifdef USE_SSE2
+  /* store the integer part of y in mm0 */
+  emm2 = _mm_cvttps_epi32(y);
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
+  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
+  y = _mm_cvtepi32_ps(emm2);
+
+  emm2 = _mm_sub_epi32(emm2, *(v4si*)_pi32_2);
+  
+  /* get the swap sign flag */
+  emm0 = _mm_andnot_si128(emm2, *(v4si*)_pi32_4);
+  emm0 = _mm_slli_epi32(emm0, 29);
+  /* get the polynom selection mask */
+  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
+  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+  
+  v4sf sign_bit = _mm_castsi128_ps(emm0);
+  v4sf poly_mask = _mm_castsi128_ps(emm2);
+#else
+  /* store the integer part of y in mm0:mm1 */
+  xmm2 = _mm_movehl_ps(xmm2, y);
+  mm2 = _mm_cvttps_pi32(y);
+  mm3 = _mm_cvttps_pi32(xmm2);
+
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
+  mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
+  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
+  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
+
+  y = _mm_cvtpi32x2_ps(mm2, mm3);
+
+
+  mm2 = _mm_sub_pi32(mm2, *(v2si*)_pi32_2);
+  mm3 = _mm_sub_pi32(mm3, *(v2si*)_pi32_2);
+
+  /* get the swap sign flag in mm0:mm1 and the 
+     polynom selection mask in mm2:mm3 */
+
+  mm0 = _mm_andnot_si64(mm2, *(v2si*)_pi32_4);
+  mm1 = _mm_andnot_si64(mm3, *(v2si*)_pi32_4);
+  mm0 = _mm_slli_pi32(mm0, 29);
+  mm1 = _mm_slli_pi32(mm1, 29);
+
+  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
+  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
+
+  mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
+  mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
+
+  v4sf sign_bit, poly_mask;
+  COPY_MM_TO_XMM(mm0, mm1, sign_bit);
+  COPY_MM_TO_XMM(mm2, mm3, poly_mask);
+  _mm_empty(); /* good-bye mmx */
+#endif
+  /* The magic pass: "Extended precision modular arithmetic" 
+     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
+  xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
+  xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
+  xmm1 = _mm_mul_ps(y, xmm1);
+  xmm2 = _mm_mul_ps(y, xmm2);
+  xmm3 = _mm_mul_ps(y, xmm3);
+  x = _mm_add_ps(x, xmm1);
+  x = _mm_add_ps(x, xmm2);
+  x = _mm_add_ps(x, xmm3);
+  
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+  y = *(v4sf*)_ps_coscof_p0;
+  v4sf z = _mm_mul_ps(x,x);
+
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
+  y = _mm_mul_ps(y, z);
+  y = _mm_mul_ps(y, z);
+  v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
+  y = _mm_sub_ps(y, tmp);
+  y = _mm_add_ps(y, *(v4sf*)_ps_1);
+  
+  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+
+  v4sf y2 = *(v4sf*)_ps_sincof_p0;
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_mul_ps(y2, x);
+  y2 = _mm_add_ps(y2, x);
+
+  /* select the correct result from the two polynoms */  
+  xmm3 = poly_mask;
+  y2 = _mm_and_ps(xmm3, y2); //, xmm3);
+  y = _mm_andnot_ps(xmm3, y);
+  y = _mm_add_ps(y,y2);
+  /* update the sign */
+  y = _mm_xor_ps(y, sign_bit);
+
+  return y;
+}
+
+/* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
+   it is almost as fast, and gives you a free cosine with your sine */
+void sincos_ps(v4sf x, v4sf *s, v4sf *c) {
+  v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
+#ifdef USE_SSE2
+  v4si emm0, emm2, emm4;
+#else
+  v2si mm0, mm1, mm2, mm3, mm4, mm5;
+#endif
+  sign_bit_sin = x;
+  /* take the absolute value */
+  x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
+  /* extract the sign bit (upper one) */
+  sign_bit_sin = _mm_and_ps(sign_bit_sin, *(v4sf*)_ps_sign_mask);
+  
+  /* scale by 4/Pi */
+  y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
+    
+#ifdef USE_SSE2
+  /* store the integer part of y in emm2 */
+  emm2 = _mm_cvttps_epi32(y);
+
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
+  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
+  y = _mm_cvtepi32_ps(emm2);
+
+  emm4 = emm2;
+
+  /* get the swap sign flag for the sine */
+  emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
+  emm0 = _mm_slli_epi32(emm0, 29);
+  v4sf swap_sign_bit_sin = _mm_castsi128_ps(emm0);
+
+  /* get the polynom selection mask for the sine*/
+  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
+  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
+  v4sf poly_mask = _mm_castsi128_ps(emm2);
+#else
+  /* store the integer part of y in mm2:mm3 */
+  xmm3 = _mm_movehl_ps(xmm3, y);
+  mm2 = _mm_cvttps_pi32(y);
+  mm3 = _mm_cvttps_pi32(xmm3);
+
+  /* j=(j+1) & (~1) (see the cephes sources) */
+  mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
+  mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
+  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
+  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
+
+  y = _mm_cvtpi32x2_ps(mm2, mm3);
+
+  mm4 = mm2;
+  mm5 = mm3;
+
+  /* get the swap sign flag for the sine */
+  mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
+  mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
+  mm0 = _mm_slli_pi32(mm0, 29);
+  mm1 = _mm_slli_pi32(mm1, 29);
+  v4sf swap_sign_bit_sin;
+  COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit_sin);
+
+  /* get the polynom selection mask for the sine */
+
+  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
+  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
+  mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
+  mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
+  v4sf poly_mask;
+  COPY_MM_TO_XMM(mm2, mm3, poly_mask);
+#endif
+
+  /* The magic pass: "Extended precision modular arithmetic" 
+     x = ((x - y * DP1) - y * DP2) - y * DP3; */
+  xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
+  xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
+  xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
+  xmm1 = _mm_mul_ps(y, xmm1);
+  xmm2 = _mm_mul_ps(y, xmm2);
+  xmm3 = _mm_mul_ps(y, xmm3);
+  x = _mm_add_ps(x, xmm1);
+  x = _mm_add_ps(x, xmm2);
+  x = _mm_add_ps(x, xmm3);
+
+#ifdef USE_SSE2
+  emm4 = _mm_sub_epi32(emm4, *(v4si*)_pi32_2);
+  emm4 = _mm_andnot_si128(emm4, *(v4si*)_pi32_4);
+  emm4 = _mm_slli_epi32(emm4, 29);
+  v4sf sign_bit_cos = _mm_castsi128_ps(emm4);
+#else
+  /* get the sign flag for the cosine */
+  mm4 = _mm_sub_pi32(mm4, *(v2si*)_pi32_2);
+  mm5 = _mm_sub_pi32(mm5, *(v2si*)_pi32_2);
+  mm4 = _mm_andnot_si64(mm4, *(v2si*)_pi32_4);
+  mm5 = _mm_andnot_si64(mm5, *(v2si*)_pi32_4);
+  mm4 = _mm_slli_pi32(mm4, 29);
+  mm5 = _mm_slli_pi32(mm5, 29);
+  v4sf sign_bit_cos;
+  COPY_MM_TO_XMM(mm4, mm5, sign_bit_cos);
+  _mm_empty(); /* good-bye mmx */
+#endif
+
+  sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
+
+  
+  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
+  v4sf z = _mm_mul_ps(x,x);
+  y = *(v4sf*)_ps_coscof_p0;
+
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
+  y = _mm_mul_ps(y, z);
+  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
+  y = _mm_mul_ps(y, z);
+  y = _mm_mul_ps(y, z);
+  v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
+  y = _mm_sub_ps(y, tmp);
+  y = _mm_add_ps(y, *(v4sf*)_ps_1);
+  
+  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
+
+  v4sf y2 = *(v4sf*)_ps_sincof_p0;
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
+  y2 = _mm_mul_ps(y2, z);
+  y2 = _mm_mul_ps(y2, x);
+  y2 = _mm_add_ps(y2, x);
+
+  /* select the correct result from the two polynoms */  
+  xmm3 = poly_mask;
+  v4sf ysin2 = _mm_and_ps(xmm3, y2);
+  v4sf ysin1 = _mm_andnot_ps(xmm3, y);
+  y2 = _mm_sub_ps(y2,ysin2);
+  y = _mm_sub_ps(y, ysin1);
+
+  xmm1 = _mm_add_ps(ysin1,ysin2);
+  xmm2 = _mm_add_ps(y,y2);
+ 
+  /* update the sign */
+  *s = _mm_xor_ps(xmm1, sign_bit_sin);
+  *c = _mm_xor_ps(xmm2, sign_bit_cos);
+}
+
+#endif
+
--- a/src/rubberband-c.cpp
+++ b/src/rubberband-c.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "rubberband/rubberband-c.h"
--- a/src/speex/COPYING
+++ b/src/speex/COPYING
@@ -0,0 +1,35 @@
+Copyright 2002-2007 	Xiph.org Foundation
+Copyright 2002-2007 	Jean-Marc Valin
+Copyright 2005-2007	Analog Devices Inc.
+Copyright 2005-2007	Commonwealth Scientific and Industrial Research 
+                        Organisation (CSIRO)
+Copyright 1993, 2002, 2006 David Rowe
+Copyright 2003 		EpicGames
+Copyright 1992-1994	Jutta Degener, Carsten Bormann
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+- Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+- Neither the name of the Xiph.org Foundation nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/src/speex/resample.c
+++ b/src/speex/resample.c
--- a/src/speex/speex_resampler.h
+++ b/src/speex/speex_resampler.h
@@ -0,0 +1,301 @@
+/* Copyright (C) 2007 Jean-Marc Valin
+      
+   File: speex_resampler.h
+   Resampling code
+      
+   The design goals of this code are:
+      - Very fast algorithm
+      - Low memory requirement
+      - Good *perceptual* quality (and not best SNR)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+   1. Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   3. The name of the author may not be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+   ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef SPEEX_RESAMPLER_H
+#define SPEEX_RESAMPLER_H
+
+/********* WARNING: MENTAL SANITY ENDS HERE *************/
+
+/* If the resampler is defined outside of Speex, we change the symbol
+   names so that there won't be any clash if linking with Speex later
+   on. */
+
+#define RANDOM_PREFIX rubberband
+
+#ifndef RANDOM_PREFIX
+#error "Please define RANDOM_PREFIX (above) to something specific to your project to prevent symbol name clashes"
+#endif
+
+#define CAT_PREFIX2(a,b) a ## b
+#define CAT_PREFIX(a,b) CAT_PREFIX2(a, b)
+      
+#define speex_resampler_init CAT_PREFIX(RANDOM_PREFIX,_resampler_init)
+#define speex_resampler_init_frac CAT_PREFIX(RANDOM_PREFIX,_resampler_init_frac)
+#define speex_resampler_destroy CAT_PREFIX(RANDOM_PREFIX,_resampler_destroy)
+#define speex_resampler_process_float CAT_PREFIX(RANDOM_PREFIX,_resampler_process_float)
+#define speex_resampler_process_int CAT_PREFIX(RANDOM_PREFIX,_resampler_process_int)
+#define speex_resampler_process_interleaved_float CAT_PREFIX(RANDOM_PREFIX,_resampler_process_interleaved_float)
+#define speex_resampler_process_interleaved_int CAT_PREFIX(RANDOM_PREFIX,_resampler_process_interleaved_int)
+#define speex_resampler_set_rate CAT_PREFIX(RANDOM_PREFIX,_resampler_set_rate)
+#define speex_resampler_get_rate CAT_PREFIX(RANDOM_PREFIX,_resampler_get_rate)
+#define speex_resampler_set_rate_frac CAT_PREFIX(RANDOM_PREFIX,_resampler_set_rate_frac)
+#define speex_resampler_get_ratio CAT_PREFIX(RANDOM_PREFIX,_resampler_get_ratio)
+#define speex_resampler_set_quality CAT_PREFIX(RANDOM_PREFIX,_resampler_set_quality)
+#define speex_resampler_get_quality CAT_PREFIX(RANDOM_PREFIX,_resampler_get_quality)
+#define speex_resampler_set_input_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_set_input_stride)
+#define speex_resampler_get_input_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_get_input_stride)
+#define speex_resampler_set_output_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_set_output_stride)
+#define speex_resampler_get_output_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_get_output_stride)
+#define speex_resampler_get_input_latency CAT_PREFIX(RANDOM_PREFIX,_resampler_get_input_latency)
+#define speex_resampler_get_output_latency CAT_PREFIX(RANDOM_PREFIX,_resampler_get_output_latency)
+#define speex_resampler_skip_zeros CAT_PREFIX(RANDOM_PREFIX,_resampler_skip_zeros)
+#define speex_resampler_reset_mem CAT_PREFIX(RANDOM_PREFIX,_resampler_reset_mem)
+#define speex_resampler_strerror CAT_PREFIX(RANDOM_PREFIX,_resampler_strerror)
+
+#define spx_int16_t short
+#define spx_int32_t int
+#define spx_uint16_t unsigned short
+#define spx_uint32_t unsigned int
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SPEEX_RESAMPLER_QUALITY_MAX 10
+#define SPEEX_RESAMPLER_QUALITY_MIN 0
+#define SPEEX_RESAMPLER_QUALITY_DEFAULT 4
+#define SPEEX_RESAMPLER_QUALITY_VOIP 3
+#define SPEEX_RESAMPLER_QUALITY_DESKTOP 5
+
+enum {
+   RESAMPLER_ERR_SUCCESS         = 0,
+   RESAMPLER_ERR_ALLOC_FAILED    = 1,
+   RESAMPLER_ERR_BAD_STATE       = 2,
+   RESAMPLER_ERR_INVALID_ARG     = 3,
+   RESAMPLER_ERR_PTR_OVERLAP     = 4,
+   
+   RESAMPLER_ERR_MAX_ERROR
+};
+
+struct SpeexResamplerState_;
+typedef struct SpeexResamplerState_ SpeexResamplerState;
+
+/** Create a new resampler with integer input and output rates.
+ * @param nb_channels Number of channels to be processed
+ * @param in_rate Input sampling rate (integer number of Hz).
+ * @param out_rate Output sampling rate (integer number of Hz).
+ * @param quality Resampling quality between 0 and 10, where 0 has poor quality
+ * and 10 has very high quality.
+ * @return Newly created resampler state
+ * @retval NULL Error: not enough memory
+ */
+SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels, 
+                                          spx_uint32_t in_rate, 
+                                          spx_uint32_t out_rate, 
+                                          int quality,
+                                          int *err);
+
+/** Create a new resampler with fractional input/output rates. The sampling 
+ * rate ratio is an arbitrary rational number with both the numerator and 
+ * denominator being 32-bit integers.
+ * @param nb_channels Number of channels to be processed
+ * @param ratio_num Numerator of the sampling rate ratio
+ * @param ratio_den Denominator of the sampling rate ratio
+ * @param in_rate Input sampling rate rounded to the nearest integer (in Hz).
+ * @param out_rate Output sampling rate rounded to the nearest integer (in Hz).
+ * @param quality Resampling quality between 0 and 10, where 0 has poor quality
+ * and 10 has very high quality.
+ * @return Newly created resampler state
+ * @retval NULL Error: not enough memory
+ */
+SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels, 
+                                               spx_uint32_t ratio_num, 
+                                               spx_uint32_t ratio_den, 
+                                               spx_uint32_t in_rate, 
+                                               spx_uint32_t out_rate, 
+                                               int quality,
+                                               int *err);
+
+/** Destroy a resampler state.
+ * @param st Resampler state
+ */
+void speex_resampler_destroy(SpeexResamplerState *st);
+
+/** Resample a float array. The input and output buffers must *not* overlap.
+ * @param st Resampler state
+ * @param channel_index Index of the channel to process for the multi-channel 
+ * base (0 otherwise)
+ * @param in Input buffer
+ * @param in_len Number of input samples in the input buffer. Returns the 
+ * number of samples processed
+ * @param out Output buffer
+ * @param out_len Size of the output buffer. Returns the number of samples written
+ */
+int speex_resampler_process_float(SpeexResamplerState *st, 
+                                   spx_uint32_t channel_index, 
+                                   const float *in, 
+                                   spx_uint32_t *in_len, 
+                                   float *out, 
+                                   spx_uint32_t *out_len);
+
+/** Resample an interleaved float array. The input and output buffers must *not* overlap.
+ * @param st Resampler state
+ * @param in Input buffer
+ * @param in_len Number of input samples in the input buffer. Returns the number
+ * of samples processed. This is all per-channel.
+ * @param out Output buffer
+ * @param out_len Size of the output buffer. Returns the number of samples written.
+ * This is all per-channel.
+ */
+int speex_resampler_process_interleaved_float(SpeexResamplerState *st, 
+                                               const float *in, 
+                                               spx_uint32_t *in_len, 
+                                               float *out, 
+                                               spx_uint32_t *out_len);
+
+/** Set (change) the input/output sampling rates (integer value).
+ * @param st Resampler state
+ * @param in_rate Input sampling rate (integer number of Hz).
+ * @param out_rate Output sampling rate (integer number of Hz).
+ */
+int speex_resampler_set_rate(SpeexResamplerState *st, 
+                              spx_uint32_t in_rate, 
+                              spx_uint32_t out_rate);
+
+/** Get the current input/output sampling rates (integer value).
+ * @param st Resampler state
+ * @param in_rate Input sampling rate (integer number of Hz) copied.
+ * @param out_rate Output sampling rate (integer number of Hz) copied.
+ */
+void speex_resampler_get_rate(SpeexResamplerState *st, 
+                              spx_uint32_t *in_rate, 
+                              spx_uint32_t *out_rate);
+
+/** Set (change) the input/output sampling rates and resampling ratio 
+ * (fractional values in Hz supported).
+ * @param st Resampler state
+ * @param ratio_num Numerator of the sampling rate ratio
+ * @param ratio_den Denominator of the sampling rate ratio
+ * @param in_rate Input sampling rate rounded to the nearest integer (in Hz).
+ * @param out_rate Output sampling rate rounded to the nearest integer (in Hz).
+ */
+int speex_resampler_set_rate_frac(SpeexResamplerState *st, 
+                                   spx_uint32_t ratio_num, 
+                                   spx_uint32_t ratio_den, 
+                                   spx_uint32_t in_rate, 
+                                   spx_uint32_t out_rate);
+
+/** Get the current resampling ratio. This will be reduced to the least
+ * common denominator.
+ * @param st Resampler state
+ * @param ratio_num Numerator of the sampling rate ratio copied
+ * @param ratio_den Denominator of the sampling rate ratio copied
+ */
+void speex_resampler_get_ratio(SpeexResamplerState *st, 
+                               spx_uint32_t *ratio_num, 
+                               spx_uint32_t *ratio_den);
+
+/** Set (change) the conversion quality.
+ * @param st Resampler state
+ * @param quality Resampling quality between 0 and 10, where 0 has poor 
+ * quality and 10 has very high quality.
+ */
+int speex_resampler_set_quality(SpeexResamplerState *st, 
+                                 int quality);
+
+/** Get the conversion quality.
+ * @param st Resampler state
+ * @param quality Resampling quality between 0 and 10, where 0 has poor 
+ * quality and 10 has very high quality.
+ */
+void speex_resampler_get_quality(SpeexResamplerState *st, 
+                                 int *quality);
+
+/** Set (change) the input stride.
+ * @param st Resampler state
+ * @param stride Input stride
+ */
+void speex_resampler_set_input_stride(SpeexResamplerState *st, 
+                                      spx_uint32_t stride);
+
+/** Get the input stride.
+ * @param st Resampler state
+ * @param stride Input stride copied
+ */
+void speex_resampler_get_input_stride(SpeexResamplerState *st, 
+                                      spx_uint32_t *stride);
+
+/** Set (change) the output stride.
+ * @param st Resampler state
+ * @param stride Output stride
+ */
+void speex_resampler_set_output_stride(SpeexResamplerState *st, 
+                                      spx_uint32_t stride);
+
+/** Get the output stride.
+ * @param st Resampler state copied
+ * @param stride Output stride
+ */
+void speex_resampler_get_output_stride(SpeexResamplerState *st, 
+                                      spx_uint32_t *stride);
+
+/** Get the latency in input samples introduced by the resampler.
+ * @param st Resampler state
+ */
+int speex_resampler_get_input_latency(SpeexResamplerState *st);
+
+/** Get the latency in output samples introduced by the resampler.
+ * @param st Resampler state
+ */
+int speex_resampler_get_output_latency(SpeexResamplerState *st);
+
+/** Make sure that the first samples to go out of the resamplers don't have 
+ * leading zeros. This is only useful before starting to use a newly created 
+ * resampler. It is recommended to use that when resampling an audio file, as
+ * it will generate a file with the same length. For real-time processing,
+ * it is probably easier not to use this call (so that the output duration
+ * is the same for the first frame).
+ * @param st Resampler state
+ */
+int speex_resampler_skip_zeros(SpeexResamplerState *st);
+
+/** Reset a resampler so a new (unrelated) stream can be processed.
+ * @param st Resampler state
+ */
+int speex_resampler_reset_mem(SpeexResamplerState *st);
+
+/** Returns the English meaning for an error code
+ * @param err Error code
+ * @return English string
+ */
+const char *speex_resampler_strerror(int err);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/src/system/Allocators.cpp
+++ b/src/system/Allocators.cpp
@@ -1,19 +1,31 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "Allocators.h"

+#ifdef HAVE_IPP
+#include <ipps.h>
+#endif

 #include <iostream>
 using std::cerr;
@@ -21,6 +33,37 @@ using std::endl;

 namespace RubberBand {

+#ifdef HAVE_IPP
+
+template <>
+float *allocate(size_t count)
+{
+    float *ptr = ippsMalloc_32f(count);
+    if (!ptr) throw (std::bad_alloc());
+    return ptr;
+}
+
+template <>
+double *allocate(size_t count)
+{
+    double *ptr = ippsMalloc_64f(count);
+    if (!ptr) throw (std::bad_alloc());
+    return ptr;
+}
+
+template <>
+void deallocate(float *ptr)
+{
+    if (ptr) ippsFree((void *)ptr);
+}
+
+template <>
+void deallocate(double *ptr)
+{
+    if (ptr) ippsFree((void *)ptr);
+}
+
+#endif

 }

--- a/src/system/Allocators.h
+++ b/src/system/Allocators.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_ALLOCATORS_H_
@@ -34,6 +43,9 @@
 #include <sys/mman.h>
 #endif

+#ifdef LACK_BAD_ALLOC
+namespace std { struct bad_alloc { }; }
+#endif

 namespace RubberBand {

@@ -41,22 +53,55 @@ template <typename T>
 T *allocate(size_t count)
 {
    void *ptr = 0;
+    // 32-byte alignment is required for at least OpenMAX
+    static const int alignment = 32;
+#ifdef USE_OWN_ALIGNED_MALLOC
+    // Alignment must be a power of two, bigger than the pointer
+    // size. Stuff the actual malloc'd pointer in just before the
+    // returned value.  This is the least desirable way to do this --
+    // the other options below are all better
+    size_t allocd = count * sizeof(T) + alignment;
+    void *buf = malloc(allocd);
+    if (buf) {
+        char *adj = (char *)buf;
+        while ((unsigned long long)adj & (alignment-1)) --adj;
+        ptr = ((char *)adj) + alignment;
+        ((void **)ptr)[-1] = buf;
+    }
+#else /* !USE_OWN_ALIGNED_MALLOC */
 #ifdef HAVE_POSIX_MEMALIGN
-    if (posix_memalign(&ptr, 16, count * sizeof(T))) {
+    if (posix_memalign(&ptr, alignment, count * sizeof(T))) {
        ptr = malloc(count * sizeof(T));
    }
-#else 
-    // Note that malloc always aligns to 16 byte boundaries on OS/X,
-    // so we don't need posix_memalign there (which is fortunate,
-    // since it doesn't exist)
+#else /* !HAVE_POSIX_MEMALIGN */
+#ifdef __MSVC__
+    ptr = _aligned_malloc(count * sizeof(T), alignment);
+#else /* !__MSVC__ */
+#warning "No aligned malloc available or defined"
+    // Note that malloc always aligns to 16 byte boundaries on OS/X
    ptr = malloc(count * sizeof(T));
-#endif 
+#endif /* !__MSVC__ */
+#endif /* !HAVE_POSIX_MEMALIGN */
+#endif /* !USE_OWN_ALIGNED_MALLOC */
    if (!ptr) {
+#ifndef NO_EXCEPTIONS
        throw(std::bad_alloc());
+#else
+        abort();
+#endif
    }
    return (T *)ptr;
 }

+#ifdef HAVE_IPP
+
+template <>
+float *allocate(size_t count);
+
+template <>
+double *allocate(size_t count);
+
+#endif
 	
 template <typename T>
 T *allocate_and_zero(size_t count)
@@ -69,9 +114,26 @@ T *allocate_and_zero(size_t count)
 template <typename T>
 void deallocate(T *ptr)
 {
+#ifdef USE_OWN_ALIGNED_MALLOC
+    if (ptr) free(((void **)ptr)[-1]);
+#else /* !USE_OWN_ALIGNED_MALLOC */
+#ifdef __MSVC__
+    if (ptr) _aligned_free((void *)ptr);
+#else /* !__MSVC__ */
    if (ptr) free((void *)ptr);
+#endif /* !__MSVC__ */
+#endif /* !USE_OWN_ALIGNED_MALLOC */
 }

+#ifdef HAVE_IPP
+
+template <>
+void deallocate(float *);
+
+template <>
+void deallocate(double *);
+
+#endif

 /// Reallocate preserving contents but leaving additional memory uninitialised	
 template <typename T>
@@ -159,6 +221,17 @@ T **reallocate_and_zero_extend_channels(T **ptr,
    return newptr;
 }

+/// RAII class to call deallocate() on destruction
+template <typename T>
+class Deallocator
+{
+public:
+    Deallocator(T *t) : m_t(t) { }
+    ~Deallocator() { deallocate<T>(m_t); }
+private:
+    T *m_t;
+};
+
 }

 #endif
--- a/src/system/Thread.cpp
+++ b/src/system/Thread.cpp
@@ -1,25 +1,37 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

+#ifndef NO_THREADING

 #include "Thread.h"

 #include <iostream>
 #include <cstdlib>

+#ifdef USE_PTHREADS
 #include <sys/time.h>
 #include <time.h>
+#endif

 using std::cerr;
 using std::endl;
@@ -280,6 +292,7 @@ Condition::signal()

 #else /* !_WIN32 */

+#ifdef USE_PTHREADS

 Thread::Thread() :
    m_id(0),
@@ -541,6 +554,93 @@ Condition::signal()
    pthread_cond_signal(&m_condition);
 }

+#else /* !USE_PTHREADS */
+
+Thread::Thread()
+{
+}
+
+Thread::~Thread()
+{
+}
+
+void
+Thread::start()
+{
+    abort();
+}    
+
+void 
+Thread::wait()
+{
+    abort();
+}
+
+Thread::Id
+Thread::id()
+{
+    abort();
+}
+
+bool
+Thread::threadingAvailable()
+{
+    return false;
+}
+
+Mutex::Mutex()
+{
+}
+
+Mutex::~Mutex()
+{
+}
+
+void
+Mutex::lock()
+{
+    abort();
+}
+
+void
+Mutex::unlock()
+{
+    abort();
+}
+
+bool
+Mutex::trylock()
+{
+    abort();
+}
+
+Condition::Condition(const char *)
+{
+}
+
+Condition::~Condition()
+{
+}
+
+void
+Condition::lock()
+{
+    abort();
+}
+
+void 
+Condition::wait(int us)
+{
+    abort();
+}
+
+void
+Condition::signal()
+{
+    abort();
+}
+
+#endif /* !USE_PTHREADS */
 #endif /* !_WIN32 */

 MutexLocker::MutexLocker(Mutex *mutex) :
@@ -560,3 +660,4 @@ MutexLocker::~MutexLocker()

 }

+#endif
--- a/src/system/Thread.h
+++ b/src/system/Thread.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_THREAD_H_
@@ -17,11 +26,16 @@

 #include <string>

+#ifndef NO_THREADING

 #ifdef _WIN32
 #include <windows.h>
 #else /* !_WIN32 */
+#ifdef USE_PTHREADS
 #include <pthread.h>
+#else /* !USE_PTHREADS */
+#error No thread implementation selected
+#endif /* !USE_PTHREADS */
 #endif /* !_WIN32 */

 //#define DEBUG_THREAD 1
@@ -37,7 +51,9 @@ public:
 #ifdef _WIN32
    typedef HANDLE Id;
 #else
+#ifdef USE_PTHREADS
    typedef pthread_t Id;
+#endif
 #endif

    Thread();
@@ -59,10 +75,12 @@ private:
    bool m_extant;
    static DWORD WINAPI staticRun(LPVOID lpParam);
 #else
+#ifdef USE_PTHREADS
    pthread_t m_id;
    bool m_extant;
    static void *staticRun(void *);
 #endif
+#endif
 };

 class Mutex
@@ -82,12 +100,14 @@ private:
    DWORD m_lockedBy;
 #endif
 #else
+#ifdef USE_PTHREADS
    pthread_mutex_t m_mutex;
 #ifndef NO_THREAD_CHECKS
    pthread_t m_lockedBy;
    bool m_locked;
 #endif
 #endif
+#endif
 };

 class MutexLocker
@@ -133,10 +153,12 @@ private:
    HANDLE m_condition;
    bool m_locked;
 #else
+#ifdef USE_PTHREADS
    pthread_mutex_t m_mutex;
    pthread_cond_t m_condition;
    bool m_locked;
 #endif
+#endif
 #ifdef DEBUG_CONDITION
    std::string m_name;
 #endif
@@ -144,5 +166,67 @@ private:

 }

+#else
+
+/* Stub threading interface. We do not have threading support in this code. */
+
+namespace RubberBand
+{
+
+class Thread
+{
+public:
+    typedef unsigned int Id;
+
+    Thread() { }
+    virtual ~Thread() { }
+
+    Id id() { return 0; }
+
+    void start() { } 
+    void wait() { }
+
+    static bool threadingAvailable() { return false; }
+
+protected:
+    virtual void run() = 0;
+
+private:
+};
+
+class Mutex
+{
+public:
+    Mutex() { }
+    ~Mutex() { }
+
+    void lock() { }
+    void unlock() { }
+    bool trylock() { return false; }
+};
+
+class MutexLocker
+{
+public:
+    MutexLocker(Mutex *) { }
+    ~MutexLocker() { }
+};
+
+class Condition
+{
+public:
+    Condition(std::string name) { }
+    ~Condition() { }
+    
+    void lock() { }
+    void unlock() { }
+    void wait(int us = 0) { }
+
+    void signal() { }
+};
+
+}
+
+#endif /* NO_THREADING */

 #endif
--- a/src/system/VectorOps.h
+++ b/src/system/VectorOps.h
@@ -1,21 +1,41 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_VECTOR_OPS_H_
 #define _RUBBERBAND_VECTOR_OPS_H_

+#ifdef HAVE_IPP
+#ifndef _MSC_VER
+#include <inttypes.h>
+#endif
+#include <ipps.h>
+#include <ippac.h>
+#endif

+#ifdef HAVE_VDSP
+#include <vecLib/vDSP.h>
+#include <vecLib/vForce.h>
+#endif

 #include <cstring>
 #include "sysutils.h"
@@ -40,6 +60,33 @@ inline void v_zero(T *const R__ ptr,
    }
 }

+#if defined HAVE_IPP
+template<> 
+inline void v_zero(float *const R__ ptr, 
+                   const int count)
+{
+    ippsZero_32f(ptr, count);
+}
+template<> 
+inline void v_zero(double *const R__ ptr,
+                   const int count)
+{
+    ippsZero_64f(ptr, count);
+}
+#elif defined HAVE_VDSP
+template<> 
+inline void v_zero(float *const R__ ptr, 
+                   const int count)
+{
+    vDSP_vclr(ptr, 1, count);
+}
+template<> 
+inline void v_zero(double *const R__ ptr,
+                   const int count)
+{
+    vDSP_vclrD(ptr, 1, count);
+}
+#endif

 template<typename T>
 inline void v_zero_channels(T *const R__ *const R__ ptr,
@@ -71,6 +118,22 @@ inline void v_copy(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_copy(float *const R__ dst,
+                   const float *const R__ src,
+                   const int count)
+{
+    ippsCopy_32f(src, dst, count);
+}
+template<>
+inline void v_copy(double *const R__ dst,
+                   const double *const R__ src,
+                   const int count)
+{
+    ippsCopy_64f(src, dst, count);
+}
+#endif

 template<typename T>
 inline void v_copy_channels(T *const R__ *const R__ dst,
@@ -92,6 +155,22 @@ inline void v_move(T *const dst,
    memmove(dst, src, count * sizeof(T));
 }

+#if defined HAVE_IPP
+template<>
+inline void v_move(float *const dst,
+                   const float *const src,
+                   const int count)
+{
+    ippsMove_32f(src, dst, count);
+}
+template<>
+inline void v_move(double *const dst,
+                   const double *const src,
+                   const int count)
+{
+    ippsMove_64f(src, dst, count);
+}
+#endif

 template<typename T, typename U>
 inline void v_convert(U *const R__ dst,
@@ -118,6 +197,37 @@ inline void v_convert(double *const R__ dst,
    v_copy(dst, src, count);
 }

+#if defined HAVE_IPP
+template<>
+inline void v_convert(double *const R__ dst,
+                      const float *const R__ src,
+                      const int count)
+{
+    ippsConvert_32f64f(src, dst, count);
+}
+template<>
+inline void v_convert(float *const R__ dst,
+                      const double *const R__ src,
+                      const int count)
+{
+    ippsConvert_64f32f(src, dst, count);
+}
+#elif defined HAVE_VDSP
+template<>
+inline void v_convert(double *const R__ dst,
+                      const float *const R__ src,
+                      const int count)
+{
+    vDSP_vspdp((float *)src, 1, dst, 1, count);
+}
+template<>
+inline void v_convert(float *const R__ dst,
+                      const double *const R__ src,
+                      const int count)
+{
+    vDSP_vdpsp((double *)src, 1, dst, 1, count);
+}
+#endif

 template<typename T, typename U>
 inline void v_convert_channels(U *const R__ *const R__ dst,
@@ -150,6 +260,21 @@ inline void v_add(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_add(float *const R__ dst,
+                  const float *const R__ src,
+                  const int count)
+{
+    ippsAdd_32f_I(src, dst, count);
+}    
+inline void v_add(double *const R__ dst,
+                  const double *const R__ src,
+                  const int count)
+{
+    ippsAdd_64f_I(src, dst, count);
+}    
+#endif

 template<typename T>
 inline void v_add_channels(T *const R__ *const R__ dst,
@@ -194,6 +319,21 @@ inline void v_subtract(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_subtract(float *const R__ dst,
+                       const float *const R__ src,
+                       const int count)
+{
+    ippsSub_32f_I(src, dst, count);
+}    
+inline void v_subtract(double *const R__ dst,
+                       const double *const R__ src,
+                       const int count)
+{
+    ippsSub_64f_I(src, dst, count);
+}    
+#endif

 template<typename T, typename G>
 inline void v_scale(T *const R__ dst,
@@ -205,6 +345,22 @@ inline void v_scale(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_scale(float *const R__ dst,
+                    const float gain,
+                    const int count)
+{
+    ippsMulC_32f_I(gain, dst, count);
+}
+template<>
+inline void v_scale(double *const R__ dst,
+                    const double gain,
+                    const int count)
+{
+    ippsMulC_64f_I(gain, dst, count);
+}
+#endif

 template<typename T>
 inline void v_multiply(T *const R__ dst,
@@ -216,6 +372,22 @@ inline void v_multiply(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_multiply(float *const R__ dst,
+                       const float *const R__ src,
+                       const int count)
+{
+    ippsMul_32f_I(src, dst, count);
+}
+template<>
+inline void v_multiply(double *const R__ dst,
+                       const double *const R__ src,
+                       const int count)
+{
+    ippsMul_64f_I(src, dst, count);
+}
+#endif

 template<typename T>
 inline void v_multiply(T *const R__ dst,
@@ -238,7 +410,41 @@ inline void v_divide(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_divide(float *const R__ dst,
+                     const float *const R__ src,
+                     const int count)
+{
+    ippsDiv_32f_I(src, dst, count);
+}
+template<>
+inline void v_divide(double *const R__ dst,
+                     const double *const R__ src,
+                     const int count)
+{
+    ippsDiv_64f_I(src, dst, count);
+}
+#endif

+#if defined HAVE_IPP 
+template<>
+inline void v_multiply(float *const R__ dst,
+                       const float *const R__ src1,
+                       const float *const R__ src2,
+                       const int count)
+{
+    ippsMul_32f(src1, src2, dst, count);
+}    
+template<>
+inline void v_multiply(double *const R__ dst,
+                       const double *const R__ src1,
+                       const double *const R__ src2,
+                       const int count)
+{
+    ippsMul_64f(src1, src2, dst, count);
+}
+#endif

 template<typename T>
 inline void v_multiply_and_add(T *const R__ dst,
@@ -251,6 +457,24 @@ inline void v_multiply_and_add(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_multiply_and_add(float *const R__ dst,
+                               const float *const R__ src1,
+                               const float *const R__ src2,
+                               const int count)
+{
+    ippsAddProduct_32f(src1, src2, dst, count);
+}
+template<>
+inline void v_multiply_and_add(double *const R__ dst,
+                               const double *const R__ src1,
+                               const double *const R__ src2,
+                               const int count)
+{
+    ippsAddProduct_64f(src1, src2, dst, count);
+}
+#endif

 template<typename T>
 inline T v_sum(const T *const R__ src,
@@ -272,6 +496,41 @@ inline void v_log(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_log(float *const R__ dst,
+                  const int count)
+{
+    ippsLn_32f_I(dst, count);
+}
+template<>
+inline void v_log(double *const R__ dst,
+                  const int count)
+{
+    ippsLn_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+// no in-place vForce functions for these -- can we use the
+// out-of-place functions with equal input and output vectors? can we
+// use an out-of-place one with temporary buffer and still be faster
+// than doing it any other way?
+template<>
+inline void v_log(float *const R__ dst,
+                  const int count)
+{
+    float tmp[count];
+    vvlogf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+template<>
+inline void v_log(double *const R__ dst,
+                  const int count)
+{
+    double tmp[count];
+    vvlog(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_exp(T *const R__ dst,
@@ -282,6 +541,41 @@ inline void v_exp(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_exp(float *const R__ dst,
+                  const int count)
+{
+    ippsExp_32f_I(dst, count);
+}
+template<>
+inline void v_exp(double *const R__ dst,
+                  const int count)
+{
+    ippsExp_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+// no in-place vForce functions for these -- can we use the
+// out-of-place functions with equal input and output vectors? can we
+// use an out-of-place one with temporary buffer and still be faster
+// than doing it any other way?
+template<>
+inline void v_exp(float *const R__ dst,
+                  const int count)
+{
+    float tmp[count];
+    vvexpf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+template<>
+inline void v_exp(double *const R__ dst,
+                  const int count)
+{
+    double tmp[count];
+    vvexp(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_sqrt(T *const R__ dst,
@@ -292,6 +586,41 @@ inline void v_sqrt(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_sqrt(float *const R__ dst,
+                   const int count)
+{
+    ippsSqrt_32f_I(dst, count);
+}
+template<>
+inline void v_sqrt(double *const R__ dst,
+                   const int count)
+{
+    ippsSqrt_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+// no in-place vForce functions for these -- can we use the
+// out-of-place functions with equal input and output vectors? can we
+// use an out-of-place one with temporary buffer and still be faster
+// than doing it any other way?
+template<>
+inline void v_sqrt(float *const R__ dst,
+                   const int count)
+{
+    float tmp[count];
+    vvsqrtf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+template<>
+inline void v_sqrt(double *const R__ dst,
+                   const int count)
+{
+    double tmp[count];
+    vvsqrt(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_square(T *const R__ dst,
@@ -302,6 +631,20 @@ inline void v_square(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_square(float *const R__ dst,
+                   const int count)
+{
+    ippsSqr_32f_I(dst, count);
+}
+template<>
+inline void v_square(double *const R__ dst,
+                   const int count)
+{
+    ippsSqr_64f_I(dst, count);
+}
+#endif

 template<typename T>
 inline void v_abs(T *const R__ dst,
@@ -312,6 +655,29 @@ inline void v_abs(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_abs(float *const R__ dst,
+                  const int count)
+{
+    ippsAbs_32f_I(dst, count);
+}
+template<>
+inline void v_abs(double *const R__ dst,
+                  const int count)
+{
+    ippsAbs_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+template<>
+inline void v_abs(float *const R__ dst,
+                  const int count)
+{
+    float tmp[count];
+    vvfabf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_interleave(T *const R__ dst,
@@ -341,6 +707,17 @@ inline void v_interleave(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_interleave(float *const R__ dst,
+                         const float *const R__ *const R__ src,
+                         const int channels, 
+                         const int count)
+{
+    ippsInterleave_32f((const Ipp32f **)src, channels, count, dst);
+}
+// IPP does not (currently?) provide double-precision interleave
+#endif

 template<typename T>
 inline void v_deinterleave(T *const R__ *const R__ dst,
@@ -370,6 +747,17 @@ inline void v_deinterleave(T *const R__ *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_deinterleave(float *const R__ *const R__ dst,
+                           const float *const R__ src,
+                           const int channels, 
+                           const int count)
+{
+    ippsDeinterleave_32f((const Ipp32f *)src, channels, count, (Ipp32f **)dst);
+}
+// IPP does not (currently?) provide double-precision deinterleave
+#endif

 template<typename T>
 inline void v_fftshift(T *const R__ ptr,
--- a/src/system/VectorOpsComplex.cpp
+++ b/src/system/VectorOpsComplex.cpp
@@ -1,24 +1,198 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "VectorOpsComplex.h"

 #include "system/sysutils.h"

+#include <cassert>
+
+#if defined USE_POMMIER_MATHFUN
+#if defined __ARMEL__
+#include "pommier/neon_mathfun.h"
+#else
+#include "pommier/sse_mathfun.h"
+#endif
+#endif

 namespace RubberBand {

+#ifdef USE_APPROXIMATE_ATAN2
+float approximate_atan2f(float real, float imag)
+{
+    static const float pi = M_PI;
+    static const float pi2 = M_PI / 2;
+
+    float atan;
+
+    if (real == 0.f) {
+
+        if (imag > 0.0f) atan = pi2;
+        else if (imag == 0.0f) atan = 0.0f;
+        else atan = -pi2;
+
+    } else {
+
+        float z = imag/real;
+
+        if (fabsf(z) < 1.f) {
+            atan = z / (1.f + 0.28f * z * z);
+            if (real < 0.f) {
+                if (imag < 0.f) atan -= pi;
+                else atan += pi;
+            }
+        } else {
+            atan = pi2 - z / (z * z + 0.28f);
+            if (imag < 0.f) atan -= pi;
+        }
+    }
+}
+#endif
+
+#if defined USE_POMMIER_MATHFUN
+
+#ifdef __ARMEL__
+typedef union {
+  float f[4];
+  int i[4];
+  v4sf  v;
+} V4SF;
+#else
+typedef ALIGN16_BEG union {
+  float f[4];
+  int i[4];
+  v4sf  v;
+} ALIGN16_END V4SF;
+#endif
+
+void
+v_polar_to_cartesian_pommier(float *const R__ real,
+                             float *const R__ imag,
+                             const float *const R__ mag,
+                             const float *const R__ phase,
+                             const int count)
+{
+    int idx = 0, tidx = 0;
+    int i = 0;
+
+    for (int i = 0; i + 4 < count; i += 4) {
+
+	V4SF fmag, fphase, fre, fim;
+
+        for (int j = 0; j < 3; ++j) {
+            fmag.f[j] = mag[idx];
+            fphase.f[j] = phase[idx++];
+        }
+
+	sincos_ps(fphase.v, &fim.v, &fre.v);
+
+        for (int j = 0; j < 3; ++j) {
+            real[tidx] = fre.f[j] * fmag.f[j];
+            imag[tidx++] = fim.f[j] * fmag.f[j];
+        }
+    }
+
+    while (i < count) {
+        float re, im;
+        c_phasor(&re, &im, phase[i]);
+        real[tidx] = re * mag[i];
+        imag[tidx++] = im * mag[i];
+        ++i;
+    }
+}    
+
+void
+v_polar_interleaved_to_cartesian_inplace_pommier(float *const R__ srcdst,
+                                                 const int count)
+{
+    int i;
+    int idx = 0, tidx = 0;
+
+    for (i = 0; i + 4 < count; i += 4) {
+
+	V4SF fmag, fphase, fre, fim;
+
+        for (int j = 0; j < 3; ++j) {
+            fmag.f[j] = srcdst[idx++];
+            fphase.f[j] = srcdst[idx++];
+        }
+
+	sincos_ps(fphase.v, &fim.v, &fre.v);
+
+        for (int j = 0; j < 3; ++j) {
+            srcdst[tidx++] = fre.f[j] * fmag.f[j];
+            srcdst[tidx++] = fim.f[j] * fmag.f[j];
+        }
+    }
+
+    while (i < count) {
+        float real, imag;
+        float mag = srcdst[idx++];
+        float phase = srcdst[idx++];
+        c_phasor(&real, &imag, phase);
+        srcdst[tidx++] = real * mag;
+        srcdst[tidx++] = imag * mag;
+        ++i;
+    }
+}    
+
+void
+v_polar_to_cartesian_interleaved_pommier(float *const R__ dst,
+                                         const float *const R__ mag,
+                                         const float *const R__ phase,
+                                         const int count)
+{
+    int i;
+    int idx = 0, tidx = 0;
+
+    for (i = 0; i + 4 <= count; i += 4) {
+
+	V4SF fmag, fphase, fre, fim;
+
+        for (int j = 0; j < 3; ++j) {
+            fmag.f[j] = mag[idx];
+            fphase.f[j] = phase[idx];
+            ++idx;
+        }
+
+	sincos_ps(fphase.v, &fim.v, &fre.v);
+
+        for (int j = 0; j < 3; ++j) {
+            dst[tidx++] = fre.f[j] * fmag.f[j];
+            dst[tidx++] = fim.f[j] * fmag.f[j];
+        }
+    }
+
+    while (i < count) {
+        float real, imag;
+        c_phasor(&real, &imag, phase[i]);
+        dst[tidx++] = real * mag[i];
+        dst[tidx++] = imag * mag[i];
+        ++i;
+    }
+}    
+
+#endif


 }
--- a/src/system/VectorOpsComplex.h
+++ b/src/system/VectorOpsComplex.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_VECTOR_OPS_COMPLEX_H_
@@ -26,7 +35,22 @@ inline void c_phasor(T *real, T *imag, T phase)
 {
    //!!! IPP contains ippsSinCos_xxx in ippvm.h -- these are
    //!!! fixed-accuracy, test and compare
-#if defined __GNUC__
+#if defined HAVE_VDSP
+    int one = 1;
+    if (sizeof(T) == sizeof(float)) {
+        vvsincosf((float *)imag, (float *)real, (const float *)&phase, &one);
+    } else {
+        vvsincos((double *)imag, (double *)real, (const double *)&phase, &one);
+    }
+#elif defined LACK_SINCOS
+    if (sizeof(T) == sizeof(float)) {
+        *real = cosf(phase);
+        *imag = sinf(phase);
+    } else {
+        *real = cos(phase);
+        *imag = sin(phase);
+    }
+#elif defined __GNUC__
    if (sizeof(T) == sizeof(float)) {
        sincosf(phase, (float *)imag, (float *)real);
    } else {
@@ -50,23 +74,35 @@ inline void c_magphase(T *mag, T *phase, T real, T imag)
    *phase = atan2(imag, real);
 }

+#ifdef USE_APPROXIMATE_ATAN2
+// NB arguments in opposite order from usual for atan2f
+extern float approximate_atan2f(float real, float imag);
+template<>
+inline void c_magphase(float *mag, float *phase, float real, float imag)
+{
+    float atan = approximate_atan2f(real, imag);
+    *phase = atan;
+    *mag = sqrtf(real * real + imag * imag);
+}
+#else
 template<>
 inline void c_magphase(float *mag, float *phase, float real, float imag)
 {
    *mag = sqrtf(real * real + imag * imag);
    *phase = atan2f(imag, real);
 }
+#endif


-template<typename T>
+template<typename S, typename T> // S source, T target
 void v_polar_to_cartesian(T *const R__ real,
                          T *const R__ imag,
-                          T *const R__ mag,
-                          T *const R__ phase,
+                          const S *const R__ mag,
+                          const S *const R__ phase,
                          const int count)
 {
    for (int i = 0; i < count; ++i) {
-        c_phasor(real + i, imag + i, phase[i]);
+        c_phasor<T>(real + i, imag + i, phase[i]);
    }
    v_multiply(real, mag, count);
    v_multiply(imag, mag, count);
@@ -86,29 +122,117 @@ void v_polar_interleaved_to_cartesian_inplace(T *const R__ srcdst,
    }
 }

-template<typename T>
+template<typename S, typename T> // S source, T target
+void v_polar_to_cartesian_interleaved(T *const R__ dst,
+                                      const S *const R__ mag,
+                                      const S *const R__ phase,
+                                      const int count)
+{
+    T real, imag;
+    for (int i = 0; i < count; ++i) {
+        c_phasor<T>(&real, &imag, phase[i]);
+        real *= mag[i];
+        imag *= mag[i];
+        dst[i*2] = real;
+        dst[i*2+1] = imag;
+    }
+}    
+
+#if defined USE_POMMIER_MATHFUN
+void v_polar_to_cartesian_pommier(float *const R__ real,
+                                  float *const R__ imag,
+                                  const float *const R__ mag,
+                                  const float *const R__ phase,
+                                  const int count);
+void v_polar_interleaved_to_cartesian_inplace_pommier(float *const R__ srcdst,
+                                                      const int count);
+void v_polar_to_cartesian_interleaved_pommier(float *const R__ dst,
+                                              const float *const R__ mag,
+                                              const float *const R__ phase,
+                                              const int count);
+
+template<>
+inline void v_polar_to_cartesian(float *const R__ real,
+                                 float *const R__ imag,
+                                 const float *const R__ mag,
+                                 const float *const R__ phase,
+                                 const int count)
+{
+    v_polar_to_cartesian_pommier(real, imag, mag, phase, count);
+}
+
+template<>
+inline void v_polar_interleaved_to_cartesian_inplace(float *const R__ srcdst,
+                                                     const int count)
+{
+    v_polar_interleaved_to_cartesian_inplace_pommier(srcdst, count);
+}
+
+template<>
+inline void v_polar_to_cartesian_interleaved(float *const R__ dst,
+                                             const float *const R__ mag,
+                                             const float *const R__ phase,
+                                             const int count)
+{
+    v_polar_to_cartesian_interleaved_pommier(dst, mag, phase, count);
+}
+
+#endif
+
+template<typename S, typename T> // S source, T target
 void v_cartesian_to_polar(T *const R__ mag,
                          T *const R__ phase,
-                          T *const R__ real,
-                          T *const R__ imag,
+                          const S *const R__ real,
+                          const S *const R__ imag,
                          const int count)
 {
    for (int i = 0; i < count; ++i) {
-        c_magphase(mag + i, phase + i, real[i], imag[i]);
+        c_magphase<T>(mag + i, phase + i, real[i], imag[i]);
    }
 }

-template<typename T>
+template<typename S, typename T> // S source, T target
 void v_cartesian_interleaved_to_polar(T *const R__ mag,
                                      T *const R__ phase,
-                                      const T *const R__ src,
+                                      const S *const R__ src,
                                      const int count)
 {
    for (int i = 0; i < count; ++i) {
-        c_magphase(mag + i, phase + i, src[i*2], src[i*2+1]);
+        c_magphase<T>(mag + i, phase + i, src[i*2], src[i*2+1]);
    }
 }

+#ifdef HAVE_VDSP
+template<>
+inline void v_cartesian_to_polar(float *const R__ mag,
+                                 float *const R__ phase,
+                                 const float *const R__ real,
+                                 const float *const R__ imag,
+                                 const int count)
+{
+    DSPSplitComplex c;
+    c.realp = const_cast<float *>(real);
+    c.imagp = const_cast<float *>(imag);
+    vDSP_zvmags(&c, 1, phase, 1, count); // using phase as a temporary dest
+    vvsqrtf(mag, phase, &count); // using phase as the source
+    vvatan2f(phase, imag, real, &count);
+}
+template<>
+inline void v_cartesian_to_polar(double *const R__ mag,
+                                 double *const R__ phase,
+                                 const double *const R__ real,
+                                 const double *const R__ imag,
+                                 const int count)
+{
+    // double precision, this is significantly faster than using vDSP_polar
+    DSPDoubleSplitComplex c;
+    c.realp = const_cast<double *>(real);
+    c.imagp = const_cast<double *>(imag);
+    vDSP_zvmagsD(&c, 1, phase, 1, count); // using phase as a temporary dest
+    vvsqrt(mag, phase, &count); // using phase as the source
+    vvatan2(phase, imag, real, &count);
+}
+#endif

 template<typename T>
 void v_cartesian_to_polar_interleaved_inplace(T *const R__ srcdst,
--- a/src/system/sysutils.cpp
+++ b/src/system/sysutils.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "sysutils.h"
@@ -38,7 +47,14 @@
 #include <cstdlib>
 #include <iostream>

+#ifdef HAVE_IPP
+#include <ipp.h> // for static init
+#endif

+#ifdef HAVE_VDSP
+#include <vecLib/vDSP.h>
+#include <fenv.h>
+#endif

 #ifdef _WIN32
 #include <fstream>
@@ -55,17 +71,17 @@ system_get_platform_tag()
 #else /* !_WIN32 */
 #ifdef __APPLE__
    return "osx";
-#else 
+#else /* !__APPLE__ */
 #ifdef __LINUX__
    if (sizeof(long) == 8) {
        return "linux64";
    } else {
        return "linux";
    }
-#else 
+#else /* !__LINUX__ */
    return "posix";
-#endif 
-#endif 
+#endif /* !__LINUX__ */
+#endif /* !__APPLE__ */
 #endif /* !_WIN32 */
 }

@@ -192,6 +208,30 @@ void clock_gettime(int, struct timespec *ts)

 void system_specific_initialise()
 {
+#if defined HAVE_IPP
+#ifndef USE_IPP_DYNAMIC_LIBS
+//    std::cerr << "Calling ippStaticInit" << std::endl;
+    ippStaticInit();
+#endif
+    ippSetDenormAreZeros(1);
+#elif defined HAVE_VDSP
+#if defined __i386__ || defined __x86_64__ 
+    fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV);
+#endif
+#endif
+#if defined __ARMEL__
+    static const unsigned int x = 0x04086060;
+    static const unsigned int y = 0x03000000;
+    int r;
+    asm volatile (
+        "fmrx	%0, fpscr   \n\t"
+        "and	%0, %0, %1  \n\t"
+        "orr	%0, %0, %2  \n\t"
+        "fmxr	fpscr, %0   \n\t"
+        : "=r"(r)
+        : "r"(x), "r"(y)
+	);
+#endif
 }

 void system_specific_application_initialise()
@@ -226,9 +266,13 @@ system_get_process_status(int pid)
 #ifdef _WIN32
 void system_memorybarrier()
 {
+#ifdef __MSVC__
+    MemoryBarrier();
+#else /* (mingw) */
    LONG Barrier = 0;
    __asm__ __volatile__("xchgl %%eax,%0 "
                         : "=r" (Barrier));
+#endif
 }
 #else /* !_WIN32 */
 #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
--- a/src/system/sysutils.h
+++ b/src/system/sysutils.h
@@ -1,20 +1,33 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_SYSUTILS_H_
 #define _RUBBERBAND_SYSUTILS_H_

+#ifdef __MSVC__
+#include "float_cast/float_cast.h"
+#define R__ __restrict
+#endif

 #ifdef __GNUC__
 #define R__ __restrict__
@@ -27,11 +40,26 @@
 #ifdef __MINGW32__
 #include <malloc.h>
 #else
+#ifndef __MSVC__
 #include <alloca.h>
 #endif
+#endif

+#ifdef __MSVC__
+#include <malloc.h>
+#include <process.h>
+#define alloca _alloca
+#define getpid _getpid
+#endif

+#ifdef __MSVC__
+#define uint8_t unsigned __int8
+#define uint16_t unsigned __int16
+#define uint32_t unsigned __int32
+#define ssize_t long
+#else
 #include <stdint.h>
+#endif

 #include <math.h>

@@ -49,6 +77,7 @@ extern ProcessStatus system_get_process_status(int pid);
 struct timespec { long tv_sec; long tv_nsec; };
 void clock_gettime(int clk_id, struct timespec *p);
 #define CLOCK_MONOTONIC 1
+#define CLOCK_REALTIME 2
 #endif

 #ifdef _WIN32
@@ -60,9 +89,15 @@ struct timespec { long tv_sec; long tv_nsec; };
 // always uses GetPerformanceCounter, does not check whether it's valid or not:
 void clock_gettime(int clk_id, struct timespec *p);
 #define CLOCK_MONOTONIC 1
+#define CLOCK_REALTIME 2

 #endif

+#ifdef __MSVC__
+
+void usleep(unsigned long);
+
+#endif

 inline double mod(double x, double y) { return x - (y * floor(x / y)); }
 inline float modf(float x, float y) { return x - (y * float(floor(x / y))); }
@@ -125,5 +160,9 @@ extern void system_memorybarrier();

 #endif

+#ifdef NO_THREADING
+#undef MBARRIER
+#define MBARRIER() 
+#endif

 #endif