Update to new combined build

2012-09-09 16:57:42 +01:00
parent 4ecb1fa6f1
commit 93c38b50a0
77 changed files with 10427 additions and 897 deletions
--- a/src/system/VectorOps.h
+++ b/src/system/VectorOps.h
@@ -1,21 +1,41 @@
 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

 /*
-    Rubber Band
+    Rubber Band Library
    An audio time-stretching and pitch-shifting library.
-    Copyright 2007-2011 Chris Cannam.
-    
+    Copyright 2007-2012 Particular Programs Ltd.
+
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
+
+    Alternatively, if you have a valid commercial licence for the
+    Rubber Band Library obtained by agreement with the copyright
+    holders, you may redistribute and/or modify it under the terms
+    described in that licence.
+
+    If you wish to distribute code using the Rubber Band Library
+    under terms other than those of the GNU General Public License,
+    you must obtain a valid commercial licence before doing so.
 */

 #ifndef _RUBBERBAND_VECTOR_OPS_H_
 #define _RUBBERBAND_VECTOR_OPS_H_

+#ifdef HAVE_IPP
+#ifndef _MSC_VER
+#include <inttypes.h>
+#endif
+#include <ipps.h>
+#include <ippac.h>
+#endif

+#ifdef HAVE_VDSP
+#include <vecLib/vDSP.h>
+#include <vecLib/vForce.h>
+#endif

 #include <cstring>
 #include "sysutils.h"
@@ -40,6 +60,33 @@ inline void v_zero(T *const R__ ptr,
    }
 }

+#if defined HAVE_IPP
+template<> 
+inline void v_zero(float *const R__ ptr, 
+                   const int count)
+{
+    ippsZero_32f(ptr, count);
+}
+template<> 
+inline void v_zero(double *const R__ ptr,
+                   const int count)
+{
+    ippsZero_64f(ptr, count);
+}
+#elif defined HAVE_VDSP
+template<> 
+inline void v_zero(float *const R__ ptr, 
+                   const int count)
+{
+    vDSP_vclr(ptr, 1, count);
+}
+template<> 
+inline void v_zero(double *const R__ ptr,
+                   const int count)
+{
+    vDSP_vclrD(ptr, 1, count);
+}
+#endif

 template<typename T>
 inline void v_zero_channels(T *const R__ *const R__ ptr,
@@ -71,6 +118,22 @@ inline void v_copy(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_copy(float *const R__ dst,
+                   const float *const R__ src,
+                   const int count)
+{
+    ippsCopy_32f(src, dst, count);
+}
+template<>
+inline void v_copy(double *const R__ dst,
+                   const double *const R__ src,
+                   const int count)
+{
+    ippsCopy_64f(src, dst, count);
+}
+#endif

 template<typename T>
 inline void v_copy_channels(T *const R__ *const R__ dst,
@@ -92,6 +155,22 @@ inline void v_move(T *const dst,
    memmove(dst, src, count * sizeof(T));
 }

+#if defined HAVE_IPP
+template<>
+inline void v_move(float *const dst,
+                   const float *const src,
+                   const int count)
+{
+    ippsMove_32f(src, dst, count);
+}
+template<>
+inline void v_move(double *const dst,
+                   const double *const src,
+                   const int count)
+{
+    ippsMove_64f(src, dst, count);
+}
+#endif

 template<typename T, typename U>
 inline void v_convert(U *const R__ dst,
@@ -118,6 +197,37 @@ inline void v_convert(double *const R__ dst,
    v_copy(dst, src, count);
 }

+#if defined HAVE_IPP
+template<>
+inline void v_convert(double *const R__ dst,
+                      const float *const R__ src,
+                      const int count)
+{
+    ippsConvert_32f64f(src, dst, count);
+}
+template<>
+inline void v_convert(float *const R__ dst,
+                      const double *const R__ src,
+                      const int count)
+{
+    ippsConvert_64f32f(src, dst, count);
+}
+#elif defined HAVE_VDSP
+template<>
+inline void v_convert(double *const R__ dst,
+                      const float *const R__ src,
+                      const int count)
+{
+    vDSP_vspdp((float *)src, 1, dst, 1, count);
+}
+template<>
+inline void v_convert(float *const R__ dst,
+                      const double *const R__ src,
+                      const int count)
+{
+    vDSP_vdpsp((double *)src, 1, dst, 1, count);
+}
+#endif

 template<typename T, typename U>
 inline void v_convert_channels(U *const R__ *const R__ dst,
@@ -150,6 +260,21 @@ inline void v_add(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_add(float *const R__ dst,
+                  const float *const R__ src,
+                  const int count)
+{
+    ippsAdd_32f_I(src, dst, count);
+}    
+inline void v_add(double *const R__ dst,
+                  const double *const R__ src,
+                  const int count)
+{
+    ippsAdd_64f_I(src, dst, count);
+}    
+#endif

 template<typename T>
 inline void v_add_channels(T *const R__ *const R__ dst,
@@ -194,6 +319,21 @@ inline void v_subtract(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_subtract(float *const R__ dst,
+                       const float *const R__ src,
+                       const int count)
+{
+    ippsSub_32f_I(src, dst, count);
+}    
+inline void v_subtract(double *const R__ dst,
+                       const double *const R__ src,
+                       const int count)
+{
+    ippsSub_64f_I(src, dst, count);
+}    
+#endif

 template<typename T, typename G>
 inline void v_scale(T *const R__ dst,
@@ -205,6 +345,22 @@ inline void v_scale(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_scale(float *const R__ dst,
+                    const float gain,
+                    const int count)
+{
+    ippsMulC_32f_I(gain, dst, count);
+}
+template<>
+inline void v_scale(double *const R__ dst,
+                    const double gain,
+                    const int count)
+{
+    ippsMulC_64f_I(gain, dst, count);
+}
+#endif

 template<typename T>
 inline void v_multiply(T *const R__ dst,
@@ -216,6 +372,22 @@ inline void v_multiply(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_multiply(float *const R__ dst,
+                       const float *const R__ src,
+                       const int count)
+{
+    ippsMul_32f_I(src, dst, count);
+}
+template<>
+inline void v_multiply(double *const R__ dst,
+                       const double *const R__ src,
+                       const int count)
+{
+    ippsMul_64f_I(src, dst, count);
+}
+#endif

 template<typename T>
 inline void v_multiply(T *const R__ dst,
@@ -238,7 +410,41 @@ inline void v_divide(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_divide(float *const R__ dst,
+                     const float *const R__ src,
+                     const int count)
+{
+    ippsDiv_32f_I(src, dst, count);
+}
+template<>
+inline void v_divide(double *const R__ dst,
+                     const double *const R__ src,
+                     const int count)
+{
+    ippsDiv_64f_I(src, dst, count);
+}
+#endif

+#if defined HAVE_IPP 
+template<>
+inline void v_multiply(float *const R__ dst,
+                       const float *const R__ src1,
+                       const float *const R__ src2,
+                       const int count)
+{
+    ippsMul_32f(src1, src2, dst, count);
+}    
+template<>
+inline void v_multiply(double *const R__ dst,
+                       const double *const R__ src1,
+                       const double *const R__ src2,
+                       const int count)
+{
+    ippsMul_64f(src1, src2, dst, count);
+}
+#endif

 template<typename T>
 inline void v_multiply_and_add(T *const R__ dst,
@@ -251,6 +457,24 @@ inline void v_multiply_and_add(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_multiply_and_add(float *const R__ dst,
+                               const float *const R__ src1,
+                               const float *const R__ src2,
+                               const int count)
+{
+    ippsAddProduct_32f(src1, src2, dst, count);
+}
+template<>
+inline void v_multiply_and_add(double *const R__ dst,
+                               const double *const R__ src1,
+                               const double *const R__ src2,
+                               const int count)
+{
+    ippsAddProduct_64f(src1, src2, dst, count);
+}
+#endif

 template<typename T>
 inline T v_sum(const T *const R__ src,
@@ -272,6 +496,41 @@ inline void v_log(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_log(float *const R__ dst,
+                  const int count)
+{
+    ippsLn_32f_I(dst, count);
+}
+template<>
+inline void v_log(double *const R__ dst,
+                  const int count)
+{
+    ippsLn_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+// no in-place vForce functions for these -- can we use the
+// out-of-place functions with equal input and output vectors? can we
+// use an out-of-place one with temporary buffer and still be faster
+// than doing it any other way?
+template<>
+inline void v_log(float *const R__ dst,
+                  const int count)
+{
+    float tmp[count];
+    vvlogf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+template<>
+inline void v_log(double *const R__ dst,
+                  const int count)
+{
+    double tmp[count];
+    vvlog(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_exp(T *const R__ dst,
@@ -282,6 +541,41 @@ inline void v_exp(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_exp(float *const R__ dst,
+                  const int count)
+{
+    ippsExp_32f_I(dst, count);
+}
+template<>
+inline void v_exp(double *const R__ dst,
+                  const int count)
+{
+    ippsExp_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+// no in-place vForce functions for these -- can we use the
+// out-of-place functions with equal input and output vectors? can we
+// use an out-of-place one with temporary buffer and still be faster
+// than doing it any other way?
+template<>
+inline void v_exp(float *const R__ dst,
+                  const int count)
+{
+    float tmp[count];
+    vvexpf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+template<>
+inline void v_exp(double *const R__ dst,
+                  const int count)
+{
+    double tmp[count];
+    vvexp(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_sqrt(T *const R__ dst,
@@ -292,6 +586,41 @@ inline void v_sqrt(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_sqrt(float *const R__ dst,
+                   const int count)
+{
+    ippsSqrt_32f_I(dst, count);
+}
+template<>
+inline void v_sqrt(double *const R__ dst,
+                   const int count)
+{
+    ippsSqrt_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+// no in-place vForce functions for these -- can we use the
+// out-of-place functions with equal input and output vectors? can we
+// use an out-of-place one with temporary buffer and still be faster
+// than doing it any other way?
+template<>
+inline void v_sqrt(float *const R__ dst,
+                   const int count)
+{
+    float tmp[count];
+    vvsqrtf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+template<>
+inline void v_sqrt(double *const R__ dst,
+                   const int count)
+{
+    double tmp[count];
+    vvsqrt(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_square(T *const R__ dst,
@@ -302,6 +631,20 @@ inline void v_square(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_square(float *const R__ dst,
+                   const int count)
+{
+    ippsSqr_32f_I(dst, count);
+}
+template<>
+inline void v_square(double *const R__ dst,
+                   const int count)
+{
+    ippsSqr_64f_I(dst, count);
+}
+#endif

 template<typename T>
 inline void v_abs(T *const R__ dst,
@@ -312,6 +655,29 @@ inline void v_abs(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_abs(float *const R__ dst,
+                  const int count)
+{
+    ippsAbs_32f_I(dst, count);
+}
+template<>
+inline void v_abs(double *const R__ dst,
+                  const int count)
+{
+    ippsAbs_64f_I(dst, count);
+}
+#elif defined HAVE_VDSP
+template<>
+inline void v_abs(float *const R__ dst,
+                  const int count)
+{
+    float tmp[count];
+    vvfabf(tmp, dst, &count);
+    v_copy(dst, tmp, count);
+}
+#endif

 template<typename T>
 inline void v_interleave(T *const R__ dst,
@@ -341,6 +707,17 @@ inline void v_interleave(T *const R__ dst,
    }
 }

+#if defined HAVE_IPP 
+template<>
+inline void v_interleave(float *const R__ dst,
+                         const float *const R__ *const R__ src,
+                         const int channels, 
+                         const int count)
+{
+    ippsInterleave_32f((const Ipp32f **)src, channels, count, dst);
+}
+// IPP does not (currently?) provide double-precision interleave
+#endif

 template<typename T>
 inline void v_deinterleave(T *const R__ *const R__ dst,
@@ -370,6 +747,17 @@ inline void v_deinterleave(T *const R__ *const R__ dst,
    }
 }

+#if defined HAVE_IPP
+template<>
+inline void v_deinterleave(float *const R__ *const R__ dst,
+                           const float *const R__ src,
+                           const int channels, 
+                           const int count)
+{
+    ippsDeinterleave_32f((const Ipp32f *)src, channels, count, (Ipp32f **)dst);
+}
+// IPP does not (currently?) provide double-precision deinterleave
+#endif

 template<typename T>
 inline void v_fftshift(T *const R__ ptr,