Update to new combined build

2012-09-09 16:57:42 +01:00
parent 4ecb1fa6f1
commit 93c38b50a0
77 changed files with 10427 additions and 897 deletions
--- a/src/system/Allocators.cpp
+++ b/src/system/Allocators.cpp
@@ -1,19 +1,31 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "Allocators.h"

+#ifdef HAVE_IPP
+#include <ipps.h>
+#endif

 #include <iostream>
 using std::cerr;
@@ -21,6 +33,37 @@ using std::endl;

 namespace RubberBand {

+#ifdef HAVE_IPP
+
+template <>
+float *allocate(size_t count)
+{
+    float *ptr = ippsMalloc_32f(count);
+    if (!ptr) throw (std::bad_alloc());
+    return ptr;
+}
+
+template <>
+double *allocate(size_t count)
+{
+    double *ptr = ippsMalloc_64f(count);
+    if (!ptr) throw (std::bad_alloc());
+    return ptr;
+}
+
+template <>
+void deallocate(float *ptr)
+{
+    if (ptr) ippsFree((void *)ptr);
+}
+
+template <>
+void deallocate(double *ptr)
+{
+    if (ptr) ippsFree((void *)ptr);
+}
+
+#endif

 }

--- a/src/system/Allocators.h
+++ b/src/system/Allocators.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_ALLOCATORS_H_
@@ -34,6 +43,9 @@
 #include <sys/mman.h>
 #endif

+#ifdef LACK_BAD_ALLOC
+namespace std { struct bad_alloc { }; }
+#endif

 namespace RubberBand {

@@ -41,22 +53,55 @@ template <typename T>
 T *allocate(size_t count)
 {
    void *ptr = 0;
+    // 32-byte alignment is required for at least OpenMAX
+    static const int alignment = 32;
+#ifdef USE_OWN_ALIGNED_MALLOC
+    // Alignment must be a power of two, bigger than the pointer
+    // size. Stuff the actual malloc'd pointer in just before the
+    // returned value.  This is the least desirable way to do this --
+    // the other options below are all better
+    size_t allocd = count * sizeof(T) + alignment;
+    void *buf = malloc(allocd);
+    if (buf) {
+        char *adj = (char *)buf;
+        while ((unsigned long long)adj & (alignment-1)) --adj;
+        ptr = ((char *)adj) + alignment;
+        ((void **)ptr)[-1] = buf;
+    }
+#else /* !USE_OWN_ALIGNED_MALLOC */
 #ifdef HAVE_POSIX_MEMALIGN
-    if (posix_memalign(&ptr, 16, count * sizeof(T))) {
+    if (posix_memalign(&ptr, alignment, count * sizeof(T))) {
        ptr = malloc(count * sizeof(T));
    }
-#else 
-    // Note that malloc always aligns to 16 byte boundaries on OS/X,
-    // so we don't need posix_memalign there (which is fortunate,
-    // since it doesn't exist)
+#else /* !HAVE_POSIX_MEMALIGN */
+#ifdef __MSVC__
+    ptr = _aligned_malloc(count * sizeof(T), alignment);
+#else /* !__MSVC__ */
+#warning "No aligned malloc available or defined"
+    // Note that malloc always aligns to 16 byte boundaries on OS/X
    ptr = malloc(count * sizeof(T));
-#endif 
+#endif /* !__MSVC__ */
+#endif /* !HAVE_POSIX_MEMALIGN */
+#endif /* !USE_OWN_ALIGNED_MALLOC */
    if (!ptr) {
+#ifndef NO_EXCEPTIONS
        throw(std::bad_alloc());
+#else
+        abort();
+#endif
    }
    return (T *)ptr;
 }

+#ifdef HAVE_IPP
+
+template <>
+float *allocate(size_t count);
+
+template <>
+double *allocate(size_t count);
+
+#endif
 	
 template <typename T>
 T *allocate_and_zero(size_t count)
@@ -69,9 +114,26 @@ T *allocate_and_zero(size_t count)
 template <typename T>
 void deallocate(T *ptr)
 {
+#ifdef USE_OWN_ALIGNED_MALLOC
+    if (ptr) free(((void **)ptr)[-1]);
+#else /* !USE_OWN_ALIGNED_MALLOC */
+#ifdef __MSVC__
+    if (ptr) _aligned_free((void *)ptr);
+#else /* !__MSVC__ */
    if (ptr) free((void *)ptr);
+#endif /* !__MSVC__ */
+#endif /* !USE_OWN_ALIGNED_MALLOC */
 }

+#ifdef HAVE_IPP
+
+template <>
+void deallocate(float *);
+
+template <>
+void deallocate(double *);
+
+#endif

 /// Reallocate preserving contents but leaving additional memory uninitialised	
 template <typename T>
@@ -159,6 +221,17 @@ T **reallocate_and_zero_extend_channels(T **ptr,
    return newptr;
 }

+/// RAII class to call deallocate() on destruction
+template <typename T>
+class Deallocator
+{
+public:
+    Deallocator(T *t) : m_t(t) { }
+    ~Deallocator() { deallocate<T>(m_t); }
+private:
+    T *m_t;
+};
+
 }

 #endif
--- a/src/system/Thread.cpp
+++ b/src/system/Thread.cpp
@@ -1,25 +1,37 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

+#ifndef NO_THREADING

 #include "Thread.h"

 #include <iostream>
 #include <cstdlib>

+#ifdef USE_PTHREADS
 #include <sys/time.h>
 #include <time.h>
+#endif

 using std::cerr;
 using std::endl;
@@ -280,6 +292,7 @@ Condition::signal()

 #else /* !_WIN32 */

+#ifdef USE_PTHREADS

 Thread::Thread() :
    m_id(0),
@@ -541,6 +554,93 @@ Condition::signal()
    pthread_cond_signal(&m_condition);
 }

+#else /* !USE_PTHREADS */
+
+Thread::Thread()
+{
+}
+
+Thread::~Thread()
+{
+}
+
+void
+Thread::start()
+{
+    abort();
+}    
+
+void 
+Thread::wait()
+{
+    abort();
+}
+
+Thread::Id
+Thread::id()
+{
+    abort();
+}
+
+bool
+Thread::threadingAvailable()
+{
+    return false;
+}
+
+Mutex::Mutex()
+{
+}
+
+Mutex::~Mutex()
+{
+}
+
+void
+Mutex::lock()
+{
+    abort();
+}
+
+void
+Mutex::unlock()
+{
+    abort();
+}
+
+bool
+Mutex::trylock()
+{
+    abort();
+}
+
+Condition::Condition(const char *)
+{
+}
+
+Condition::~Condition()
+{
+}
+
+void
+Condition::lock()
+{
+    abort();
+}
+
+void 
+Condition::wait(int us)
+{
+    abort();
+}
+
+void
+Condition::signal()
+{
+    abort();
+}
+
+#endif /* !USE_PTHREADS */
 #endif /* !_WIN32 */

 MutexLocker::MutexLocker(Mutex *mutex) :
@@ -560,3 +660,4 @@ MutexLocker::~MutexLocker()

 }

+#endif
--- a/src/system/Thread.h
+++ b/src/system/Thread.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_THREAD_H_
@@ -17,11 +26,16 @@

 #include <string>

+#ifndef NO_THREADING

 #ifdef _WIN32
 #include <windows.h>
 #else /* !_WIN32 */
+#ifdef USE_PTHREADS
 #include <pthread.h>
+#else /* !USE_PTHREADS */
+#error No thread implementation selected
+#endif /* !USE_PTHREADS */
 #endif /* !_WIN32 */

 //#define DEBUG_THREAD 1
@@ -37,7 +51,9 @@ public:
 #ifdef _WIN32
    typedef HANDLE Id;
 #else
+#ifdef USE_PTHREADS
    typedef pthread_t Id;
+#endif
 #endif

    Thread();
@@ -59,10 +75,12 @@ private:
    bool m_extant;
    static DWORD WINAPI staticRun(LPVOID lpParam);
 #else
+#ifdef USE_PTHREADS
    pthread_t m_id;
    bool m_extant;
    static void *staticRun(void *);
 #endif
+#endif
 };

 class Mutex
@@ -82,12 +100,14 @@ private:
    DWORD m_lockedBy;
 #endif
 #else
+#ifdef USE_PTHREADS
    pthread_mutex_t m_mutex;
 #ifndef NO_THREAD_CHECKS
    pthread_t m_lockedBy;
    bool m_locked;
 #endif
 #endif
+#endif
 };

 class MutexLocker
@@ -133,10 +153,12 @@ private:
    HANDLE m_condition;
    bool m_locked;
 #else
+#ifdef USE_PTHREADS
    pthread_mutex_t m_mutex;
    pthread_cond_t m_condition;
    bool m_locked;
 #endif
+#endif
 #ifdef DEBUG_CONDITION
    std::string m_name;
 #endif
@@ -144,5 +166,67 @@ private:

 }

+#else
+
+/* Stub threading interface. We do not have threading support in this code. */
+
+namespace RubberBand
+{
+
+class Thread
+{
+public:
+    typedef unsigned int Id;
+
+    Thread() { }
+    virtual ~Thread() { }
+
+    Id id() { return 0; }
+
+    void start() { } 
+    void wait() { }
+
+    static bool threadingAvailable() { return false; }
+
+protected:
+    virtual void run() = 0;
+
+private:
+};
+
+class Mutex
+{
+public:
+    Mutex() { }
+    ~Mutex() { }
+
+    void lock() { }
+    void unlock() { }
+    bool trylock() { return false; }
+};
+
+class MutexLocker
+{
+public:
+    MutexLocker(Mutex *) { }
+    ~MutexLocker() { }
+};
+
+class Condition
+{
+public:
+    Condition(std::string name) { }
+    ~Condition() { }
+    
+    void lock() { }
+    void unlock() { }
+    void wait(int us = 0) { }
+
+    void signal() { }
+};
+
+}
+
+#endif /* NO_THREADING */

 #endif
--- a/src/system/VectorOps.h
+++ b/src/system/VectorOps.h
@@ -1,21 +1,41 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_VECTOR_OPS_H_
 #define _RUBBERBAND_VECTOR_OPS_H_

+#ifdef HAVE_IPP
+#ifndef _MSC_VER
+#include <inttypes.h>
+#endif
+#include <ipps.h>
+#include <ippac.h>
+#endif

+#ifdef HAVE_VDSP
+#include <vecLib/vDSP.h>
+#include <vecLib/vForce.h>
+#endif

 #include <cstring>
 #include "sysutils.h"
@@ -40,6 +60,33 @@ inline void v_zero(T *const R__ ptr,
    }
 }

+#if defined HAVE_IPP
+template<> 
+inline void v_zero(float *const R__ ptr, 
+                   const int count)
+{
+    ippsZero_32f(ptr, count);
+}
+template<> 
+inline void v_zero(double *const R__ ptr,
+                   const int count)
+{
+    ippsZero_64f(ptr, count);
+}
+#elif defined HAVE_VDSP
+template<> 
+inline void v_zero(float *const R__ ptr, 
+                   const int count)
+{
+    vDSP_vclr(ptr, 1, count);
+}
+template<> 
+inline void v_zero(double *const R__ ptr,
+                   const int count)
+{
+    vDSP_vclrD(ptr, 1, count);
+}
+#endif

 template<typename T>
 inline void v_zero_channels(T *const R__ *const R__ ptr,
@@ -71,6 +118,22 @@ inline void v_copy(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_copy(float *const R__ dst,
+                   const float *const R__ src,
+                   const int count)
+{
+    ippsCopy_32f(src, dst, count);
+}
+template<>
+inline void v_copy(double *const R__ dst,
+                   const double *const R__ src,
+                   const int count)
+{
+    ippsCopy_64f(src, dst, count);
+}
+#endif

 template<typename T>
 inline void v_copy_channels(T *const R__ *const R__ dst,
@@ -92,6 +155,22 @@ inline void v_move(T *const dst,
    memmove(dst, src, count * sizeof(T));
 }

+#if defined HAVE_IPP
+template<>
+inline void v_move(float *const dst,
+                   const float *const src,
+                   const int count)
+{
+    ippsMove_32f(src, dst, count);
+}
+template<>
+inline void v_move(double *const dst,
+                   const double *const src,
+                   const int count)
+{
+    ippsMove_64f(src, dst, count);
+}
+#endif

 template<typename T, typename U>
 inline void v_convert(U *const R__ dst,
@@ -118,6 +197,37 @@ inline void v_convert(double *const R__ dst,
    v_copy(dst, src, count);
 }

+#if defined HAVE_IPP
+template<>
+inline void v_convert(double *const R__ dst,
+                      const float *const R__ src,
+                      const int count)
+{
+    ippsConvert_32f64f(src, dst, count);
+}
+template<>
+inline void v_convert(float *const R__ dst,
+                      const double *const R__ src,
+                      const int count)
+{
+    ippsConvert_64f32f(src, dst, count);
+}
+#elif defined HAVE_VDSP
+template<>
+inline void v_convert(double *const R__ dst,
+                      const float *const R__ src,
+                      const int count)
+{
+    vDSP_vspdp((float *)src, 1, dst, 1, count);
+}
+template<>
+inline void v_convert(float *const R__ dst,
+                      const double *const R__ src,
+                      const int count)
+{
+    vDSP_vdpsp((double *)src, 1, dst, 1, count);
+}
+#endif

 template<typename T, typename U>
 inline void v_convert_channels(U *const R__ *const R__ dst,
@@ -150,6 +260,21 @@ inline void v_add(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_add(float *const R__ dst,
+                  const float *const R__ src,
+                  const int count)
+{
+    ippsAdd_32f_I(src, dst, count);
+}    
+inline void v_add(double *const R__ dst,
+                  const double *const R__ src,
+                  const int count)
+{
+    ippsAdd_64f_I(src, dst, count);
+}    
+#endif

 template<typename T>
 inline void v_add_channels(T *const R__ *const R__ dst,
@@ -194,6 +319,21 @@ inline void v_subtract(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_subtract(float *const R__ dst,
+                       const float *const R__ src,
+                       const int count)
+{
+    ippsSub_32f_I(src, dst, count);
+}    
+inline void v_subtract(double *const R__ dst,
+                       const double *const R__ src,
+                       const int count)
+{
+    ippsSub_64f_I(src, dst, count);
+}    
+#endif

 template<typename T, typename G>
 inline void v_scale(T *const R__ dst,
@@ -205,6 +345,22 @@ inline void v_scale(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_scale(float *const R__ dst,
+                    const float gain,
+                    const int count)
+{
+    ippsMulC_32f_I(gain, dst, count);
+}
+template<>
+inline void v_scale(double *const R__ dst,
+                    const double gain,
+                    const int count)
+{
+    ippsMulC_64f_I(gain, dst, count);
+}
+#endif

 template<typename T>
 inline void v_multiply(T *const R__ dst,
@@ -216,6 +372,22 @@ inline void v_multiply(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_multiply(float *const R__ dst,
+                       const float *const R__ src,
+                       const int count)
+{
+    ippsMul_32f_I(src, dst, count);
+}
+template<>
+inline void v_multiply(double *const R__ dst,
+                       const double *const R__ src,
+                       const int count)
+{
+    ippsMul_64f_I(src, dst, count);
+}
+#endif

 template<typename T>
 inline void v_multiply(T *const R__ dst,
@@ -238,7 +410,41 @@ inline void v_divide(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_divide(float *const R__ dst,
+                     const float *const R__ src,
+                     const int count)
+{
+    ippsDiv_32f_I(src, dst, count);
+}
+template<>
+inline void v_divide(double *const R__ dst,
+                     const double *const R__ src,
+                     const int count)
+{
+    ippsDiv_64f_I(src, dst, count);
+}
+#endif

+#if defined HAVE_IPP 
+template<>
+inline void v_multiply(float *const R__ dst,
+                       const float *const R__ src1,
+                       const float *const R__ src2,
+                       const int count)
+{
+    ippsMul_32f(src1, src2, dst, count);
+}    
+template<>
+inline void v_multiply(double *const R__ dst,
+                       const double *const R__ src1,
+                       const double *const R__ src2,
+                       const int count)
+{
+    ippsMul_64f(src1, src2, dst, count);
+}
+#endif

 template<typename T>
 inline void v_multiply_and_add(T *const R__ dst,
@@ -251,6 +457,24 @@ inline void v_multiply_and_add(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_multiply_and_add(float *const R__ dst,
+                               const float *const R__ src1,
+                               const float *const R__ src2,
+                               const int count)
+{
+    ippsAddProduct_32f(src1, src2, dst, count);
+}
+template<>
+inline void v_multiply_and_add(double *const R__ dst,
+                               const double *const R__ src1,
+                               const double *const R__ src2,
+                               const int count)
+{
+    ippsAddProduct_64f(src1, src2, dst, count);
+}
+#endif

 template<typename T>
 inline T v_sum(const T *const R__ src,
@@ -272,6 +496,41 @@ inline void v_log(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_log(float *const R__ dst,
+                  const int count)
+{
+    ippsLn_32f_I(dst, count);
+}
+template<>
+inline void v_log(double *const R__ dst,
+                  const int count)
+{
+    ippsLn_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+// no in-place vForce functions for these -- can we use the
+// out-of-place functions with equal input and output vectors? can we
+// use an out-of-place one with temporary buffer and still be faster
+// than doing it any other way?
+template<>
+inline void v_log(float *const R__ dst,
+                  const int count)
+{
+    float tmp[count];
+    vvlogf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+template<>
+inline void v_log(double *const R__ dst,
+                  const int count)
+{
+    double tmp[count];
+    vvlog(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_exp(T *const R__ dst,
@@ -282,6 +541,41 @@ inline void v_exp(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_exp(float *const R__ dst,
+                  const int count)
+{
+    ippsExp_32f_I(dst, count);
+}
+template<>
+inline void v_exp(double *const R__ dst,
+                  const int count)
+{
+    ippsExp_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+// no in-place vForce functions for these -- can we use the
+// out-of-place functions with equal input and output vectors? can we
+// use an out-of-place one with temporary buffer and still be faster
+// than doing it any other way?
+template<>
+inline void v_exp(float *const R__ dst,
+                  const int count)
+{
+    float tmp[count];
+    vvexpf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+template<>
+inline void v_exp(double *const R__ dst,
+                  const int count)
+{
+    double tmp[count];
+    vvexp(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_sqrt(T *const R__ dst,
@@ -292,6 +586,41 @@ inline void v_sqrt(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_sqrt(float *const R__ dst,
+                   const int count)
+{
+    ippsSqrt_32f_I(dst, count);
+}
+template<>
+inline void v_sqrt(double *const R__ dst,
+                   const int count)
+{
+    ippsSqrt_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+// no in-place vForce functions for these -- can we use the
+// out-of-place functions with equal input and output vectors? can we
+// use an out-of-place one with temporary buffer and still be faster
+// than doing it any other way?
+template<>
+inline void v_sqrt(float *const R__ dst,
+                   const int count)
+{
+    float tmp[count];
+    vvsqrtf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+template<>
+inline void v_sqrt(double *const R__ dst,
+                   const int count)
+{
+    double tmp[count];
+    vvsqrt(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_square(T *const R__ dst,
@@ -302,6 +631,20 @@ inline void v_square(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_square(float *const R__ dst,
+                   const int count)
+{
+    ippsSqr_32f_I(dst, count);
+}
+template<>
+inline void v_square(double *const R__ dst,
+                   const int count)
+{
+    ippsSqr_64f_I(dst, count);
+}
+#endif

 template<typename T>
 inline void v_abs(T *const R__ dst,
@@ -312,6 +655,29 @@ inline void v_abs(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_abs(float *const R__ dst,
+                  const int count)
+{
+    ippsAbs_32f_I(dst, count);
+}
+template<>
+inline void v_abs(double *const R__ dst,
+                  const int count)
+{
+    ippsAbs_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+template<>
+inline void v_abs(float *const R__ dst,
+                  const int count)
+{
+    float tmp[count];
+    vvfabf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_interleave(T *const R__ dst,
@@ -341,6 +707,17 @@ inline void v_interleave(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_interleave(float *const R__ dst,
+                         const float *const R__ *const R__ src,
+                         const int channels, 
+                         const int count)
+{
+    ippsInterleave_32f((const Ipp32f **)src, channels, count, dst);
+}
+// IPP does not (currently?) provide double-precision interleave
+#endif

 template<typename T>
 inline void v_deinterleave(T *const R__ *const R__ dst,
@@ -370,6 +747,17 @@ inline void v_deinterleave(T *const R__ *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_deinterleave(float *const R__ *const R__ dst,
+                           const float *const R__ src,
+                           const int channels, 
+                           const int count)
+{
+    ippsDeinterleave_32f((const Ipp32f *)src, channels, count, (Ipp32f **)dst);
+}
+// IPP does not (currently?) provide double-precision deinterleave
+#endif

 template<typename T>
 inline void v_fftshift(T *const R__ ptr,
--- a/src/system/VectorOpsComplex.cpp
+++ b/src/system/VectorOpsComplex.cpp
@@ -1,24 +1,198 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "VectorOpsComplex.h"

 #include "system/sysutils.h"

+#include <cassert>
+
+#if defined USE_POMMIER_MATHFUN
+#if defined __ARMEL__
+#include "pommier/neon_mathfun.h"
+#else
+#include "pommier/sse_mathfun.h"
+#endif
+#endif

 namespace RubberBand {

+#ifdef USE_APPROXIMATE_ATAN2
+float approximate_atan2f(float real, float imag)
+{
+    static const float pi = M_PI;
+    static const float pi2 = M_PI / 2;
+
+    float atan;
+
+    if (real == 0.f) {
+
+        if (imag > 0.0f) atan = pi2;
+        else if (imag == 0.0f) atan = 0.0f;
+        else atan = -pi2;
+
+    } else {
+
+        float z = imag/real;
+
+        if (fabsf(z) < 1.f) {
+            atan = z / (1.f + 0.28f * z * z);
+            if (real < 0.f) {
+                if (imag < 0.f) atan -= pi;
+                else atan += pi;
+            }
+        } else {
+            atan = pi2 - z / (z * z + 0.28f);
+            if (imag < 0.f) atan -= pi;
+        }
+    }
+}
+#endif
+
+#if defined USE_POMMIER_MATHFUN
+
+#ifdef __ARMEL__
+typedef union {
+  float f[4];
+  int i[4];
+  v4sf  v;
+} V4SF;
+#else
+typedef ALIGN16_BEG union {
+  float f[4];
+  int i[4];
+  v4sf  v;
+} ALIGN16_END V4SF;
+#endif
+
+void
+v_polar_to_cartesian_pommier(float *const R__ real,
+                             float *const R__ imag,
+                             const float *const R__ mag,
+                             const float *const R__ phase,
+                             const int count)
+{
+    int idx = 0, tidx = 0;
+    int i = 0;
+
+    for (int i = 0; i + 4 < count; i += 4) {
+
+	V4SF fmag, fphase, fre, fim;
+
+        for (int j = 0; j < 3; ++j) {
+            fmag.f[j] = mag[idx];
+            fphase.f[j] = phase[idx++];
+        }
+
+	sincos_ps(fphase.v, &fim.v, &fre.v);
+
+        for (int j = 0; j < 3; ++j) {
+            real[tidx] = fre.f[j] * fmag.f[j];
+            imag[tidx++] = fim.f[j] * fmag.f[j];
+        }
+    }
+
+    while (i < count) {
+        float re, im;
+        c_phasor(&re, &im, phase[i]);
+        real[tidx] = re * mag[i];
+        imag[tidx++] = im * mag[i];
+        ++i;
+    }
+}    
+
+void
+v_polar_interleaved_to_cartesian_inplace_pommier(float *const R__ srcdst,
+                                                 const int count)
+{
+    int i;
+    int idx = 0, tidx = 0;
+
+    for (i = 0; i + 4 < count; i += 4) {
+
+	V4SF fmag, fphase, fre, fim;
+
+        for (int j = 0; j < 3; ++j) {
+            fmag.f[j] = srcdst[idx++];
+            fphase.f[j] = srcdst[idx++];
+        }
+
+	sincos_ps(fphase.v, &fim.v, &fre.v);
+
+        for (int j = 0; j < 3; ++j) {
+            srcdst[tidx++] = fre.f[j] * fmag.f[j];
+            srcdst[tidx++] = fim.f[j] * fmag.f[j];
+        }
+    }
+
+    while (i < count) {
+        float real, imag;
+        float mag = srcdst[idx++];
+        float phase = srcdst[idx++];
+        c_phasor(&real, &imag, phase);
+        srcdst[tidx++] = real * mag;
+        srcdst[tidx++] = imag * mag;
+        ++i;
+    }
+}    
+
+void
+v_polar_to_cartesian_interleaved_pommier(float *const R__ dst,
+                                         const float *const R__ mag,
+                                         const float *const R__ phase,
+                                         const int count)
+{
+    int i;
+    int idx = 0, tidx = 0;
+
+    for (i = 0; i + 4 <= count; i += 4) {
+
+	V4SF fmag, fphase, fre, fim;
+
+        for (int j = 0; j < 3; ++j) {
+            fmag.f[j] = mag[idx];
+            fphase.f[j] = phase[idx];
+            ++idx;
+        }
+
+	sincos_ps(fphase.v, &fim.v, &fre.v);
+
+        for (int j = 0; j < 3; ++j) {
+            dst[tidx++] = fre.f[j] * fmag.f[j];
+            dst[tidx++] = fim.f[j] * fmag.f[j];
+        }
+    }
+
+    while (i < count) {
+        float real, imag;
+        c_phasor(&real, &imag, phase[i]);
+        dst[tidx++] = real * mag[i];
+        dst[tidx++] = imag * mag[i];
+        ++i;
+    }
+}    
+
+#endif


 }
--- a/src/system/VectorOpsComplex.h
+++ b/src/system/VectorOpsComplex.h
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_VECTOR_OPS_COMPLEX_H_
@@ -26,7 +35,22 @@ inline void c_phasor(T *real, T *imag, T phase)
 {
    //!!! IPP contains ippsSinCos_xxx in ippvm.h -- these are
    //!!! fixed-accuracy, test and compare
-#if defined __GNUC__
+#if defined HAVE_VDSP
+    int one = 1;
+    if (sizeof(T) == sizeof(float)) {
+        vvsincosf((float *)imag, (float *)real, (const float *)&phase, &one);
+    } else {
+        vvsincos((double *)imag, (double *)real, (const double *)&phase, &one);
+    }
+#elif defined LACK_SINCOS
+    if (sizeof(T) == sizeof(float)) {
+        *real = cosf(phase);
+        *imag = sinf(phase);
+    } else {
+        *real = cos(phase);
+        *imag = sin(phase);
+    }
+#elif defined __GNUC__
    if (sizeof(T) == sizeof(float)) {
        sincosf(phase, (float *)imag, (float *)real);
    } else {
@@ -50,23 +74,35 @@ inline void c_magphase(T *mag, T *phase, T real, T imag)
    *phase = atan2(imag, real);
 }

+#ifdef USE_APPROXIMATE_ATAN2
+// NB arguments in opposite order from usual for atan2f
+extern float approximate_atan2f(float real, float imag);
+template<>
+inline void c_magphase(float *mag, float *phase, float real, float imag)
+{
+    float atan = approximate_atan2f(real, imag);
+    *phase = atan;
+    *mag = sqrtf(real * real + imag * imag);
+}
+#else
 template<>
 inline void c_magphase(float *mag, float *phase, float real, float imag)
 {
    *mag = sqrtf(real * real + imag * imag);
    *phase = atan2f(imag, real);
 }
+#endif


-template<typename T>
+template<typename S, typename T> // S source, T target
 void v_polar_to_cartesian(T *const R__ real,
                          T *const R__ imag,
-                          T *const R__ mag,
-                          T *const R__ phase,
+                          const S *const R__ mag,
+                          const S *const R__ phase,
                          const int count)
 {
    for (int i = 0; i < count; ++i) {
-        c_phasor(real + i, imag + i, phase[i]);
+        c_phasor<T>(real + i, imag + i, phase[i]);
    }
    v_multiply(real, mag, count);
    v_multiply(imag, mag, count);
@@ -86,29 +122,117 @@ void v_polar_interleaved_to_cartesian_inplace(T *const R__ srcdst,
    }
 }

-template<typename T>
+template<typename S, typename T> // S source, T target
+void v_polar_to_cartesian_interleaved(T *const R__ dst,
+                                      const S *const R__ mag,
+                                      const S *const R__ phase,
+                                      const int count)
+{
+    T real, imag;
+    for (int i = 0; i < count; ++i) {
+        c_phasor<T>(&real, &imag, phase[i]);
+        real *= mag[i];
+        imag *= mag[i];
+        dst[i*2] = real;
+        dst[i*2+1] = imag;
+    }
+}    
+
+#if defined USE_POMMIER_MATHFUN
+void v_polar_to_cartesian_pommier(float *const R__ real,
+                                  float *const R__ imag,
+                                  const float *const R__ mag,
+                                  const float *const R__ phase,
+                                  const int count);
+void v_polar_interleaved_to_cartesian_inplace_pommier(float *const R__ srcdst,
+                                                      const int count);
+void v_polar_to_cartesian_interleaved_pommier(float *const R__ dst,
+                                              const float *const R__ mag,
+                                              const float *const R__ phase,
+                                              const int count);
+
+template<>
+inline void v_polar_to_cartesian(float *const R__ real,
+                                 float *const R__ imag,
+                                 const float *const R__ mag,
+                                 const float *const R__ phase,
+                                 const int count)
+{
+    v_polar_to_cartesian_pommier(real, imag, mag, phase, count);
+}
+
+template<>
+inline void v_polar_interleaved_to_cartesian_inplace(float *const R__ srcdst,
+                                                     const int count)
+{
+    v_polar_interleaved_to_cartesian_inplace_pommier(srcdst, count);
+}
+
+template<>
+inline void v_polar_to_cartesian_interleaved(float *const R__ dst,
+                                             const float *const R__ mag,
+                                             const float *const R__ phase,
+                                             const int count)
+{
+    v_polar_to_cartesian_interleaved_pommier(dst, mag, phase, count);
+}
+
+#endif
+
+template<typename S, typename T> // S source, T target
 void v_cartesian_to_polar(T *const R__ mag,
                          T *const R__ phase,
-                          T *const R__ real,
-                          T *const R__ imag,
+                          const S *const R__ real,
+                          const S *const R__ imag,
                          const int count)
 {
    for (int i = 0; i < count; ++i) {
-        c_magphase(mag + i, phase + i, real[i], imag[i]);
+        c_magphase<T>(mag + i, phase + i, real[i], imag[i]);
    }
 }

-template<typename T>
+template<typename S, typename T> // S source, T target
 void v_cartesian_interleaved_to_polar(T *const R__ mag,
                                      T *const R__ phase,
-                                      const T *const R__ src,
+                                      const S *const R__ src,
                                      const int count)
 {
    for (int i = 0; i < count; ++i) {
-        c_magphase(mag + i, phase + i, src[i*2], src[i*2+1]);
+        c_magphase<T>(mag + i, phase + i, src[i*2], src[i*2+1]);
    }
 }

+#ifdef HAVE_VDSP
+template<>
+inline void v_cartesian_to_polar(float *const R__ mag,
+                                 float *const R__ phase,
+                                 const float *const R__ real,
+                                 const float *const R__ imag,
+                                 const int count)
+{
+    DSPSplitComplex c;
+    c.realp = const_cast<float *>(real);
+    c.imagp = const_cast<float *>(imag);
+    vDSP_zvmags(&c, 1, phase, 1, count); // using phase as a temporary dest
+    vvsqrtf(mag, phase, &count); // using phase as the source
+    vvatan2f(phase, imag, real, &count);
+}
+template<>
+inline void v_cartesian_to_polar(double *const R__ mag,
+                                 double *const R__ phase,
+                                 const double *const R__ real,
+                                 const double *const R__ imag,
+                                 const int count)
+{
+    // double precision, this is significantly faster than using vDSP_polar
+    DSPDoubleSplitComplex c;
+    c.realp = const_cast<double *>(real);
+    c.imagp = const_cast<double *>(imag);
+    vDSP_zvmagsD(&c, 1, phase, 1, count); // using phase as a temporary dest
+    vvsqrt(mag, phase, &count); // using phase as the source
+    vvatan2(phase, imag, real, &count);
+}
+#endif

 template<typename T>
 void v_cartesian_to_polar_interleaved_inplace(T *const R__ srcdst,
--- a/src/system/sysutils.cpp
+++ b/src/system/sysutils.cpp
@@ -1,15 +1,24 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #include "sysutils.h"
@@ -38,7 +47,14 @@
 #include <cstdlib>
 #include <iostream>

+#ifdef HAVE_IPP
+#include <ipp.h> // for static init
+#endif

+#ifdef HAVE_VDSP
+#include <vecLib/vDSP.h>
+#include <fenv.h>
+#endif

 #ifdef _WIN32
 #include <fstream>
@@ -55,17 +71,17 @@ system_get_platform_tag()
 #else /* !_WIN32 */
 #ifdef __APPLE__
    return "osx";
-#else 
+#else /* !__APPLE__ */
 #ifdef __LINUX__
    if (sizeof(long) == 8) {
        return "linux64";
    } else {
        return "linux";
    }
-#else 
+#else /* !__LINUX__ */
    return "posix";
-#endif 
-#endif 
+#endif /* !__LINUX__ */
+#endif /* !__APPLE__ */
 #endif /* !_WIN32 */
 }

@@ -192,6 +208,30 @@ void clock_gettime(int, struct timespec *ts)

 void system_specific_initialise()
 {
+#if defined HAVE_IPP
+#ifndef USE_IPP_DYNAMIC_LIBS
+//    std::cerr << "Calling ippStaticInit" << std::endl;
+    ippStaticInit();
+#endif
+    ippSetDenormAreZeros(1);
+#elif defined HAVE_VDSP
+#if defined __i386__ || defined __x86_64__ 
+    fesetenv(FE_DFL_DISABLE_SSE_DENORMS_ENV);
+#endif
+#endif
+#if defined __ARMEL__
+    static const unsigned int x = 0x04086060;
+    static const unsigned int y = 0x03000000;
+    int r;
+    asm volatile (
+        "fmrx	%0, fpscr   \n\t"
+        "and	%0, %0, %1  \n\t"
+        "orr	%0, %0, %2  \n\t"
+        "fmxr	fpscr, %0   \n\t"
+        : "=r"(r)
+        : "r"(x), "r"(y)
+	);
+#endif
 }

 void system_specific_application_initialise()
@@ -226,9 +266,13 @@ system_get_process_status(int pid)
 #ifdef _WIN32
 void system_memorybarrier()
 {
+#ifdef __MSVC__
+    MemoryBarrier();
+#else /* (mingw) */
    LONG Barrier = 0;
    __asm__ __volatile__("xchgl %%eax,%0 "
                         : "=r" (Barrier));
+#endif
 }
 #else /* !_WIN32 */
 #if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)
--- a/src/system/sysutils.h
+++ b/src/system/sysutils.h
@@ -1,20 +1,33 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_SYSUTILS_H_
 #define _RUBBERBAND_SYSUTILS_H_

+#ifdef __MSVC__
+#include "float_cast/float_cast.h"
+#define R__ __restrict
+#endif

 #ifdef __GNUC__
 #define R__ __restrict__
@@ -27,11 +40,26 @@
 #ifdef __MINGW32__
 #include <malloc.h>
 #else
+#ifndef __MSVC__
 #include <alloca.h>
 #endif
+#endif

+#ifdef __MSVC__
+#include <malloc.h>
+#include <process.h>
+#define alloca _alloca
+#define getpid _getpid
+#endif

+#ifdef __MSVC__
+#define uint8_t unsigned __int8
+#define uint16_t unsigned __int16
+#define uint32_t unsigned __int32
+#define ssize_t long
+#else
 #include <stdint.h>
+#endif

 #include <math.h>

@@ -49,6 +77,7 @@ extern ProcessStatus system_get_process_status(int pid);
 struct timespec { long tv_sec; long tv_nsec; };
 void clock_gettime(int clk_id, struct timespec *p);
 #define CLOCK_MONOTONIC 1
+#define CLOCK_REALTIME 2
 #endif

 #ifdef _WIN32
@@ -60,9 +89,15 @@ struct timespec { long tv_sec; long tv_nsec; };
 // always uses GetPerformanceCounter, does not check whether it's valid or not:
 void clock_gettime(int clk_id, struct timespec *p);
 #define CLOCK_MONOTONIC 1
+#define CLOCK_REALTIME 2

 #endif

+#ifdef __MSVC__
+
+void usleep(unsigned long);
+
+#endif

 inline double mod(double x, double y) { return x - (y * floor(x / y)); }
 inline float modf(float x, float y) { return x - (y * float(floor(x / y))); }
@@ -125,5 +160,9 @@ extern void system_memorybarrier();

 #endif

+#ifdef NO_THREADING
+#undef MBARRIER
+#define MBARRIER() 
+#endif

 #endif