Reorganise into faster (R2) and finer (R3)
This commit is contained in:
81
src/ext/float_cast/float_cast.h
Normal file
81
src/ext/float_cast/float_cast.h
Normal file
@@ -0,0 +1,81 @@
|
||||
#ifndef ERIKD_FLOATCAST_H
|
||||
#define ERIKD_FLOATCAST_H
|
||||
|
||||
/*
|
||||
** Copyright (C) 2001 Erik de Castro Lopo <erikd AT mega-nerd DOT com>
|
||||
**
|
||||
** Permission to use, copy, modify, distribute, and sell this file for any
|
||||
** purpose is hereby granted without fee, provided that the above copyright
|
||||
** and this permission notice appear in all copies. No representations are
|
||||
** made about the suitability of this software for any purpose. It is
|
||||
** provided "as is" without express or implied warranty.
|
||||
*/
|
||||
|
||||
/* Version 1.1 */
|
||||
|
||||
|
||||
/*============================================================================
|
||||
** On Intel Pentium processors (especially PIII and probably P4), converting
|
||||
** from float to int is very slow. To meet the C specs, the code produced by
|
||||
** most C compilers targeting Pentium needs to change the FPU rounding mode
|
||||
** before the float to int conversion is performed.
|
||||
**
|
||||
** Changing the FPU rounding mode causes the FPU pipeline to be flushed. It
|
||||
** is this flushing of the pipeline which is so slow.
|
||||
**
|
||||
** Fortunately the ISO C99 specifications define the functions lrint, lrintf,
|
||||
** llrint and llrintf which fix this problem as a side effect.
|
||||
**
|
||||
** On Unix-like systems, the configure process should have detected the
|
||||
** presence of these functions. If they weren't found we have to replace them
|
||||
** here with a standard C cast.
|
||||
*/
|
||||
|
||||
/*
|
||||
** The C99 prototypes for lrint and lrintf are as follows:
|
||||
**
|
||||
** long int lrintf (float x) ;
|
||||
** long int lrint (double x) ;
|
||||
*/
|
||||
|
||||
#if (defined (_WIN64))
|
||||
|
||||
#include <math.h>
|
||||
__inline long int lrint(double flt) { return (long int)flt; }
|
||||
__inline long int lrintf(float flt) { return (long int)flt; }
|
||||
|
||||
#elif (defined (WIN32) || defined (_WIN32))
|
||||
|
||||
#include <math.h>
|
||||
|
||||
/* Win32 doesn't seem to have these functions.
|
||||
** Therefore implement inline versions of these functions here.
|
||||
*/
|
||||
|
||||
__inline long int
|
||||
lrint (double flt)
|
||||
{ int intgr;
|
||||
|
||||
_asm
|
||||
{ fld flt
|
||||
fistp intgr
|
||||
} ;
|
||||
|
||||
return intgr ;
|
||||
}
|
||||
|
||||
__inline long int
|
||||
lrintf (float flt)
|
||||
{ int intgr;
|
||||
|
||||
_asm
|
||||
{ fld flt
|
||||
fistp intgr
|
||||
} ;
|
||||
|
||||
return intgr ;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
112
src/ext/getopt/getopt.c
Normal file
112
src/ext/getopt/getopt.c
Normal file
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (c) 1987, 1993, 1994
|
||||
* The Regents of the University of California. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the University of
|
||||
* California, Berkeley and its contributors.
|
||||
* 4. Neither the name of the University nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
int opterr = 1, /* if error message should be printed */
|
||||
optind = 1, /* index into parent argv vector */
|
||||
optopt, /* character checked for validity */
|
||||
optreset; /* reset getopt */
|
||||
char *optarg; /* argument associated with option */
|
||||
|
||||
#define BADCH (int)'?'
|
||||
#define BADARG (int)':'
|
||||
#define EMSG ""
|
||||
|
||||
/*
|
||||
* getopt --
|
||||
* Parse argc/argv argument vector.
|
||||
*/
|
||||
int
|
||||
getopt(nargc, nargv, ostr)
|
||||
int nargc;
|
||||
char * const *nargv;
|
||||
const char *ostr;
|
||||
{
|
||||
static char *place = EMSG; /* option letter processing */
|
||||
char *oli; /* option letter list index */
|
||||
|
||||
if (optreset || !*place) { /* update scanning pointer */
|
||||
optreset = 0;
|
||||
if (optind >= nargc || *(place = nargv[optind]) != '-') {
|
||||
place = EMSG;
|
||||
return (-1);
|
||||
}
|
||||
if (place[1] && *++place == '-') { /* found "--" */
|
||||
++optind;
|
||||
place = EMSG;
|
||||
return (-1);
|
||||
}
|
||||
} /* option letter okay? */
|
||||
if ((optopt = (int)*place++) == (int)':' ||
|
||||
!(oli = strchr(ostr, optopt))) {
|
||||
/*
|
||||
* if the user didn't specify '-' as an option,
|
||||
* assume it means -1.
|
||||
*/
|
||||
if (optopt == (int)'-')
|
||||
return (-1);
|
||||
if (!*place)
|
||||
++optind;
|
||||
if (opterr && *ostr != ':' && optopt != BADCH)
|
||||
(void)fprintf(stderr, "%s: illegal option -- %c\n",
|
||||
"progname", optopt);
|
||||
return (BADCH);
|
||||
}
|
||||
if (*++oli != ':') { /* don't need argument */
|
||||
optarg = NULL;
|
||||
if (!*place)
|
||||
++optind;
|
||||
}
|
||||
else { /* need an argument */
|
||||
if (*place) /* no white space */
|
||||
optarg = place;
|
||||
else if (nargc <= ++optind) { /* no arg */
|
||||
place = EMSG;
|
||||
if (*ostr == ':')
|
||||
return (BADARG);
|
||||
if (opterr)
|
||||
(void)fprintf(stderr,
|
||||
"%s: option requires an argument -- %c\n",
|
||||
"progname", optopt);
|
||||
return (BADCH);
|
||||
}
|
||||
else /* white space */
|
||||
optarg = nargv[optind];
|
||||
place = EMSG;
|
||||
++optind;
|
||||
}
|
||||
return (optopt); /* dump back option letter */
|
||||
}
|
||||
110
src/ext/getopt/getopt.h
Normal file
110
src/ext/getopt/getopt.h
Normal file
@@ -0,0 +1,110 @@
|
||||
/* $NetBSD: getopt.h,v 1.4 2000/07/07 10:43:54 ad Exp $ */
|
||||
/* $FreeBSD: src/include/getopt.h,v 1.1 2002/09/29 04:14:30 eric Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Dieter Baron and Thomas Klausner.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the NetBSD
|
||||
* Foundation, Inc. and its contributors.
|
||||
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _GETOPT_H_
|
||||
#define _GETOPT_H_
|
||||
|
||||
#ifdef _WIN32
|
||||
/* from <sys/cdefs.h> */
|
||||
# ifdef __cplusplus
|
||||
# define __BEGIN_DECLS extern "C" {
|
||||
# define __END_DECLS }
|
||||
# else
|
||||
# define __BEGIN_DECLS
|
||||
# define __END_DECLS
|
||||
# endif
|
||||
# define __P(args) args
|
||||
#endif
|
||||
|
||||
/*#ifndef _WIN32
|
||||
#include <sys/cdefs.h>
|
||||
#include <unistd.h>
|
||||
#endif*/
|
||||
|
||||
#ifdef _WIN32
|
||||
# if !defined(GETOPT_API)
|
||||
# define GETOPT_API __declspec(dllimport)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Gnu like getopt_long() and BSD4.4 getsubopt()/optreset extensions
|
||||
*/
|
||||
#if !defined(_POSIX_SOURCE) && !defined(_XOPEN_SOURCE)
|
||||
#define no_argument 0
|
||||
#define required_argument 1
|
||||
#define optional_argument 2
|
||||
|
||||
struct option {
|
||||
/* name of long option */
|
||||
const char *name;
|
||||
/*
|
||||
* one of no_argument, required_argument, and optional_argument:
|
||||
* whether option takes an argument
|
||||
*/
|
||||
int has_arg;
|
||||
/* if not NULL, set *flag to val when option found */
|
||||
int *flag;
|
||||
/* if flag not NULL, value to set *flag to; else return value */
|
||||
int val;
|
||||
};
|
||||
|
||||
__BEGIN_DECLS
|
||||
GETOPT_API int getopt_long __P((int, char * const *, const char *,
|
||||
const struct option *, int *));
|
||||
__END_DECLS
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
/* These are global getopt variables */
|
||||
__BEGIN_DECLS
|
||||
|
||||
GETOPT_API extern int opterr, /* if error message should be printed */
|
||||
optind, /* index into parent argv vector */
|
||||
optopt, /* character checked for validity */
|
||||
optreset; /* reset getopt */
|
||||
GETOPT_API extern char* optarg; /* argument associated with option */
|
||||
|
||||
/* Original getopt */
|
||||
GETOPT_API int getopt __P((int, char * const *, const char *));
|
||||
|
||||
__END_DECLS
|
||||
#endif
|
||||
|
||||
#endif /* !_GETOPT_H_ */
|
||||
547
src/ext/getopt/getopt_long.c
Normal file
547
src/ext/getopt/getopt_long.c
Normal file
@@ -0,0 +1,547 @@
|
||||
/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */
|
||||
/* $FreeBSD: src/lib/libc/stdlib/getopt_long.c,v 1.2 2002/10/16 22:18:42 alfred Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2000 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Dieter Baron and Thomas Klausner.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. All advertising materials mentioning features or use of this software
|
||||
* must display the following acknowledgement:
|
||||
* This product includes software developed by the NetBSD
|
||||
* Foundation, Inc. and its contributors.
|
||||
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
/* Windows needs warnx(). We change the definition though:
|
||||
* 1. (another) global is defined, opterrmsg, which holds the error message
|
||||
* 2. errors are always printed out on stderr w/o the program name
|
||||
* Note that opterrmsg always gets set no matter what opterr is set to. The
|
||||
* error message will not be printed if opterr is 0 as usual.
|
||||
*/
|
||||
|
||||
#include "getopt.h"
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
GETOPT_API extern char opterrmsg[128];
|
||||
char opterrmsg[128]; /* last error message is stored here */
|
||||
|
||||
static void warnx(int print_error, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
if (fmt != NULL)
|
||||
_vsnprintf(opterrmsg, 128, fmt, ap);
|
||||
else
|
||||
opterrmsg[0]='\0';
|
||||
va_end(ap);
|
||||
if (print_error) {
|
||||
fprintf(stderr, opterrmsg);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
#endif /*_WIN32*/
|
||||
|
||||
/* not part of the original file */
|
||||
#ifndef _DIAGASSERT
|
||||
#define _DIAGASSERT(X)
|
||||
#endif
|
||||
|
||||
#if HAVE_CONFIG_H && !HAVE_GETOPT_LONG && !HAVE_DECL_OPTIND
|
||||
#define REPLACE_GETOPT
|
||||
#endif
|
||||
|
||||
#ifdef REPLACE_GETOPT
|
||||
#ifdef __weak_alias
|
||||
__weak_alias(getopt,_getopt)
|
||||
#endif
|
||||
int opterr = 1; /* if error message should be printed */
|
||||
int optind = 1; /* index into parent argv vector */
|
||||
int optopt = '?'; /* character checked for validity */
|
||||
int optreset; /* reset getopt */
|
||||
char *optarg; /* argument associated with option */
|
||||
#elif HAVE_CONFIG_H && !HAVE_DECL_OPTRESET
|
||||
static int optreset;
|
||||
#endif
|
||||
|
||||
#ifdef __weak_alias
|
||||
__weak_alias(getopt_long,_getopt_long)
|
||||
#endif
|
||||
|
||||
#if !HAVE_GETOPT_LONG
|
||||
#define IGNORE_FIRST (*options == '-' || *options == '+')
|
||||
#define PRINT_ERROR ((opterr) && ((*options != ':') \
|
||||
|| (IGNORE_FIRST && options[1] != ':')))
|
||||
#define IS_POSIXLY_CORRECT (getenv("POSIXLY_CORRECT") != NULL)
|
||||
#define PERMUTE (!IS_POSIXLY_CORRECT && !IGNORE_FIRST)
|
||||
/* XXX: GNU ignores PC if *options == '-' */
|
||||
#define IN_ORDER (!IS_POSIXLY_CORRECT && *options == '-')
|
||||
|
||||
/* return values */
|
||||
#define BADCH (int)'?'
|
||||
#define BADARG ((IGNORE_FIRST && options[1] == ':') \
|
||||
|| (*options == ':') ? (int)':' : (int)'?')
|
||||
#define INORDER (int)1
|
||||
|
||||
#define EMSG ""
|
||||
|
||||
static int getopt_internal(int, char * const *, const char *);
|
||||
static int gcd(int, int);
|
||||
static void permute_args(int, int, int, char * const *);
|
||||
|
||||
static char *place = EMSG; /* option letter processing */
|
||||
|
||||
/* XXX: set optreset to 1 rather than these two */
|
||||
static int nonopt_start = -1; /* first non option argument (for permute) */
|
||||
static int nonopt_end = -1; /* first option after non options (for permute) */
|
||||
|
||||
/* Error messages */
|
||||
static const char recargchar[] = "option requires an argument -- %c";
|
||||
static const char recargstring[] = "option requires an argument -- %s";
|
||||
static const char ambig[] = "ambiguous option -- %.*s";
|
||||
static const char noarg[] = "option doesn't take an argument -- %.*s";
|
||||
static const char illoptchar[] = "unknown option -- %c";
|
||||
static const char illoptstring[] = "unknown option -- %s";
|
||||
|
||||
|
||||
/*
|
||||
* Compute the greatest common divisor of a and b.
|
||||
*/
|
||||
static int
|
||||
gcd(a, b)
|
||||
int a;
|
||||
int b;
|
||||
{
|
||||
int c;
|
||||
|
||||
c = a % b;
|
||||
while (c != 0) {
|
||||
a = b;
|
||||
b = c;
|
||||
c = a % b;
|
||||
}
|
||||
|
||||
return b;
|
||||
}
|
||||
|
||||
/*
|
||||
* Exchange the block from nonopt_start to nonopt_end with the block
|
||||
* from nonopt_end to opt_end (keeping the same order of arguments
|
||||
* in each block).
|
||||
*/
|
||||
static void
|
||||
permute_args(panonopt_start, panonopt_end, opt_end, nargv)
|
||||
int panonopt_start;
|
||||
int panonopt_end;
|
||||
int opt_end;
|
||||
char * const *nargv;
|
||||
{
|
||||
int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
|
||||
char *swap;
|
||||
|
||||
_DIAGASSERT(nargv != NULL);
|
||||
|
||||
/*
|
||||
* compute lengths of blocks and number and size of cycles
|
||||
*/
|
||||
nnonopts = panonopt_end - panonopt_start;
|
||||
nopts = opt_end - panonopt_end;
|
||||
ncycle = gcd(nnonopts, nopts);
|
||||
cyclelen = (opt_end - panonopt_start) / ncycle;
|
||||
|
||||
for (i = 0; i < ncycle; i++) {
|
||||
cstart = panonopt_end+i;
|
||||
pos = cstart;
|
||||
for (j = 0; j < cyclelen; j++) {
|
||||
if (pos >= panonopt_end)
|
||||
pos -= nnonopts;
|
||||
else
|
||||
pos += nopts;
|
||||
swap = nargv[pos];
|
||||
/* LINTED const cast */
|
||||
((char **) nargv)[pos] = nargv[cstart];
|
||||
/* LINTED const cast */
|
||||
((char **)nargv)[cstart] = swap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* getopt_internal --
|
||||
* Parse argc/argv argument vector. Called by user level routines.
|
||||
* Returns -2 if -- is found (can be long option or end of options marker).
|
||||
*/
|
||||
static int
|
||||
getopt_internal(nargc, nargv, options)
|
||||
int nargc;
|
||||
char * const *nargv;
|
||||
const char *options;
|
||||
{
|
||||
char *oli; /* option letter list index */
|
||||
int optchar;
|
||||
|
||||
_DIAGASSERT(nargv != NULL);
|
||||
_DIAGASSERT(options != NULL);
|
||||
|
||||
optarg = NULL;
|
||||
|
||||
/*
|
||||
* XXX Some programs (like rsyncd) expect to be able to
|
||||
* XXX re-initialize optind to 0 and have getopt_long(3)
|
||||
* XXX properly function again. Work around this braindamage.
|
||||
*/
|
||||
if (optind == 0)
|
||||
optind = 1;
|
||||
|
||||
if (optreset)
|
||||
nonopt_start = nonopt_end = -1;
|
||||
start:
|
||||
if (optreset || !*place) { /* update scanning pointer */
|
||||
optreset = 0;
|
||||
if (optind >= nargc) { /* end of argument vector */
|
||||
place = EMSG;
|
||||
if (nonopt_end != -1) {
|
||||
/* do permutation, if we have to */
|
||||
permute_args(nonopt_start, nonopt_end,
|
||||
optind, nargv);
|
||||
optind -= nonopt_end - nonopt_start;
|
||||
}
|
||||
else if (nonopt_start != -1) {
|
||||
/*
|
||||
* If we skipped non-options, set optind
|
||||
* to the first of them.
|
||||
*/
|
||||
optind = nonopt_start;
|
||||
}
|
||||
nonopt_start = nonopt_end = -1;
|
||||
return -1;
|
||||
}
|
||||
if ((*(place = nargv[optind]) != '-')
|
||||
|| (place[1] == '\0')) { /* found non-option */
|
||||
place = EMSG;
|
||||
if (IN_ORDER) {
|
||||
/*
|
||||
* GNU extension:
|
||||
* return non-option as argument to option 1
|
||||
*/
|
||||
optarg = nargv[optind++];
|
||||
return INORDER;
|
||||
}
|
||||
if (!PERMUTE) {
|
||||
/*
|
||||
* if no permutation wanted, stop parsing
|
||||
* at first non-option
|
||||
*/
|
||||
return -1;
|
||||
}
|
||||
/* do permutation */
|
||||
if (nonopt_start == -1)
|
||||
nonopt_start = optind;
|
||||
else if (nonopt_end != -1) {
|
||||
permute_args(nonopt_start, nonopt_end,
|
||||
optind, nargv);
|
||||
nonopt_start = optind -
|
||||
(nonopt_end - nonopt_start);
|
||||
nonopt_end = -1;
|
||||
}
|
||||
optind++;
|
||||
/* process next argument */
|
||||
goto start;
|
||||
}
|
||||
if (nonopt_start != -1 && nonopt_end == -1)
|
||||
nonopt_end = optind;
|
||||
if (place[1] && *++place == '-') { /* found "--" */
|
||||
place++;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
if ((optchar = (int)*place++) == (int)':' ||
|
||||
(oli = strchr(options + (IGNORE_FIRST ? 1 : 0), optchar)) == NULL) {
|
||||
/* option letter unknown or ':' */
|
||||
if (!*place)
|
||||
++optind;
|
||||
#ifndef _WIN32
|
||||
if (PRINT_ERROR)
|
||||
warnx(illoptchar, optchar);
|
||||
#else
|
||||
warnx(PRINT_ERROR, illoptchar, optchar);
|
||||
#endif
|
||||
optopt = optchar;
|
||||
return BADCH;
|
||||
}
|
||||
if (optchar == 'W' && oli[1] == ';') { /* -W long-option */
|
||||
/* XXX: what if no long options provided (called by getopt)? */
|
||||
if (*place)
|
||||
return -2;
|
||||
|
||||
if (++optind >= nargc) { /* no arg */
|
||||
place = EMSG;
|
||||
#ifndef _WIN32
|
||||
if (PRINT_ERROR)
|
||||
warnx(recargchar, optchar);
|
||||
#else
|
||||
warnx(PRINT_ERROR, recargchar, optchar);
|
||||
#endif
|
||||
optopt = optchar;
|
||||
return BADARG;
|
||||
} else /* white space */
|
||||
place = nargv[optind];
|
||||
/*
|
||||
* Handle -W arg the same as --arg (which causes getopt to
|
||||
* stop parsing).
|
||||
*/
|
||||
return -2;
|
||||
}
|
||||
if (*++oli != ':') { /* doesn't take argument */
|
||||
if (!*place)
|
||||
++optind;
|
||||
} else { /* takes (optional) argument */
|
||||
optarg = NULL;
|
||||
if (*place) /* no white space */
|
||||
optarg = place;
|
||||
/* XXX: disable test for :: if PC? (GNU doesn't) */
|
||||
else if (oli[1] != ':') { /* arg not optional */
|
||||
if (++optind >= nargc) { /* no arg */
|
||||
place = EMSG;
|
||||
#ifndef _WIN32
|
||||
if (PRINT_ERROR)
|
||||
warnx(recargchar, optchar);
|
||||
#else
|
||||
warnx(PRINT_ERROR, recargchar, optchar);
|
||||
#endif
|
||||
optopt = optchar;
|
||||
return BADARG;
|
||||
} else
|
||||
optarg = nargv[optind];
|
||||
}
|
||||
place = EMSG;
|
||||
++optind;
|
||||
}
|
||||
/* dump back option letter */
|
||||
return optchar;
|
||||
}
|
||||
|
||||
#ifdef REPLACE_GETOPT
|
||||
/*
|
||||
* getopt --
|
||||
* Parse argc/argv argument vector.
|
||||
*
|
||||
* [eventually this will replace the real getopt]
|
||||
*/
|
||||
int
|
||||
getopt(nargc, nargv, options)
|
||||
int nargc;
|
||||
char * const *nargv;
|
||||
const char *options;
|
||||
{
|
||||
int retval;
|
||||
|
||||
_DIAGASSERT(nargv != NULL);
|
||||
_DIAGASSERT(options != NULL);
|
||||
|
||||
if ((retval = getopt_internal(nargc, nargv, options)) == -2) {
|
||||
++optind;
|
||||
/*
|
||||
* We found an option (--), so if we skipped non-options,
|
||||
* we have to permute.
|
||||
*/
|
||||
if (nonopt_end != -1) {
|
||||
permute_args(nonopt_start, nonopt_end, optind,
|
||||
nargv);
|
||||
optind -= nonopt_end - nonopt_start;
|
||||
}
|
||||
nonopt_start = nonopt_end = -1;
|
||||
retval = -1;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* getopt_long --
|
||||
* Parse argc/argv argument vector.
|
||||
*/
|
||||
int
|
||||
getopt_long(nargc, nargv, options, long_options, idx)
|
||||
int nargc;
|
||||
char * const *nargv;
|
||||
const char *options;
|
||||
const struct option *long_options;
|
||||
int *idx;
|
||||
{
|
||||
int retval;
|
||||
|
||||
_DIAGASSERT(nargv != NULL);
|
||||
_DIAGASSERT(options != NULL);
|
||||
_DIAGASSERT(long_options != NULL);
|
||||
/* idx may be NULL */
|
||||
|
||||
if ((retval = getopt_internal(nargc, nargv, options)) == -2) {
|
||||
char *current_argv, *has_equal;
|
||||
size_t current_argv_len;
|
||||
int i, match;
|
||||
|
||||
current_argv = place;
|
||||
match = -1;
|
||||
|
||||
optind++;
|
||||
place = EMSG;
|
||||
|
||||
if (*current_argv == '\0') { /* found "--" */
|
||||
/*
|
||||
* We found an option (--), so if we skipped
|
||||
* non-options, we have to permute.
|
||||
*/
|
||||
if (nonopt_end != -1) {
|
||||
permute_args(nonopt_start, nonopt_end,
|
||||
optind, nargv);
|
||||
optind -= nonopt_end - nonopt_start;
|
||||
}
|
||||
nonopt_start = nonopt_end = -1;
|
||||
return -1;
|
||||
}
|
||||
if ((has_equal = strchr(current_argv, '=')) != NULL) {
|
||||
/* argument found (--option=arg) */
|
||||
current_argv_len = has_equal - current_argv;
|
||||
has_equal++;
|
||||
} else
|
||||
current_argv_len = strlen(current_argv);
|
||||
|
||||
for (i = 0; long_options[i].name; i++) {
|
||||
/* find matching long option */
|
||||
if (strncmp(current_argv, long_options[i].name,
|
||||
current_argv_len))
|
||||
continue;
|
||||
|
||||
if (strlen(long_options[i].name) ==
|
||||
(unsigned)current_argv_len) {
|
||||
/* exact match */
|
||||
match = i;
|
||||
break;
|
||||
}
|
||||
if (match == -1) /* partial match */
|
||||
match = i;
|
||||
else {
|
||||
/* ambiguous abbreviation */
|
||||
#ifndef _WIN32
|
||||
if (PRINT_ERROR)
|
||||
warnx(ambig, (int)current_argv_len,
|
||||
current_argv);
|
||||
#else
|
||||
warnx(PRINT_ERROR, ambig, (int)current_argv_len,
|
||||
current_argv);
|
||||
#endif
|
||||
optopt = 0;
|
||||
return BADCH;
|
||||
}
|
||||
}
|
||||
if (match != -1) { /* option found */
|
||||
if (long_options[match].has_arg == no_argument
|
||||
&& has_equal) {
|
||||
#ifndef _WIN32
|
||||
if (PRINT_ERROR)
|
||||
warnx(noarg, (int)current_argv_len,
|
||||
current_argv);
|
||||
#else
|
||||
warnx(PRINT_ERROR, noarg, (int)current_argv_len,
|
||||
current_argv);
|
||||
#endif
|
||||
/*
|
||||
* XXX: GNU sets optopt to val regardless of
|
||||
* flag
|
||||
*/
|
||||
if (long_options[match].flag == NULL)
|
||||
optopt = long_options[match].val;
|
||||
else
|
||||
optopt = 0;
|
||||
return BADARG;
|
||||
}
|
||||
if (long_options[match].has_arg == required_argument ||
|
||||
long_options[match].has_arg == optional_argument) {
|
||||
if (has_equal)
|
||||
optarg = has_equal;
|
||||
else if (long_options[match].has_arg ==
|
||||
required_argument) {
|
||||
/*
|
||||
* optional argument doesn't use
|
||||
* next nargv
|
||||
*/
|
||||
optarg = nargv[optind++];
|
||||
}
|
||||
}
|
||||
if ((long_options[match].has_arg == required_argument)
|
||||
&& (optarg == NULL)) {
|
||||
/*
|
||||
* Missing argument; leading ':'
|
||||
* indicates no error should be generated
|
||||
*/
|
||||
#ifndef _WIN32
|
||||
if (PRINT_ERROR)
|
||||
warnx(recargstring, current_argv);
|
||||
#else
|
||||
warnx(PRINT_ERROR, recargstring, current_argv);
|
||||
#endif
|
||||
/*
|
||||
* XXX: GNU sets optopt to val regardless
|
||||
* of flag
|
||||
*/
|
||||
if (long_options[match].flag == NULL)
|
||||
optopt = long_options[match].val;
|
||||
else
|
||||
optopt = 0;
|
||||
--optind;
|
||||
return BADARG;
|
||||
}
|
||||
} else { /* unknown option */
|
||||
#ifndef _WIN32
|
||||
if (PRINT_ERROR)
|
||||
warnx(illoptstring, current_argv);
|
||||
#else
|
||||
warnx(PRINT_ERROR, illoptstring, current_argv);
|
||||
#endif
|
||||
optopt = 0;
|
||||
return BADCH;
|
||||
}
|
||||
if (long_options[match].flag) {
|
||||
*long_options[match].flag = long_options[match].val;
|
||||
retval = 0;
|
||||
} else
|
||||
retval = long_options[match].val;
|
||||
if (idx)
|
||||
*idx = match;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
#endif /* !GETOPT_LONG */
|
||||
11
src/ext/kissfft/COPYING
Normal file
11
src/ext/kissfft/COPYING
Normal file
@@ -0,0 +1,11 @@
|
||||
Copyright (c) 2003-2010 Mark Borgerding . All rights reserved.
|
||||
|
||||
KISS FFT is provided under:
|
||||
|
||||
SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
Being under the terms of the BSD 3-clause "New" or "Revised" License,
|
||||
according with:
|
||||
|
||||
LICENSES/BSD-3-Clause
|
||||
|
||||
167
src/ext/kissfft/_kiss_fft_guts.h
Normal file
167
src/ext/kissfft/_kiss_fft_guts.h
Normal file
@@ -0,0 +1,167 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
|
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
* See COPYING file for more information.
|
||||
*/
|
||||
|
||||
/* kiss_fft.h
|
||||
defines kiss_fft_scalar as either short or a float type
|
||||
and defines
|
||||
typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
|
||||
|
||||
#ifndef _kiss_fft_guts_h
|
||||
#define _kiss_fft_guts_h
|
||||
|
||||
#include "kiss_fft.h"
|
||||
#include "kiss_fft_log.h"
|
||||
#include <limits.h>
|
||||
|
||||
#define MAXFACTORS 32
|
||||
/* e.g. an fft of length 128 has 4 factors
|
||||
as far as kissfft is concerned
|
||||
4*4*4*2
|
||||
*/
|
||||
|
||||
struct kiss_fft_state{
|
||||
int nfft;
|
||||
int inverse;
|
||||
int factors[2*MAXFACTORS];
|
||||
kiss_fft_cpx twiddles[1];
|
||||
};
|
||||
|
||||
/*
|
||||
Explanation of macros dealing with complex math:
|
||||
|
||||
C_MUL(m,a,b) : m = a*b
|
||||
C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise
|
||||
C_SUB( res, a,b) : res = a - b
|
||||
C_SUBFROM( res , a) : res -= a
|
||||
C_ADDTO( res , a) : res += a
|
||||
* */
|
||||
#ifdef FIXED_POINT
|
||||
#include <stdint.h>
|
||||
#if (FIXED_POINT==32)
|
||||
# define FRACBITS 31
|
||||
# define SAMPPROD int64_t
|
||||
#define SAMP_MAX INT32_MAX
|
||||
#define SAMP_MIN INT32_MIN
|
||||
#else
|
||||
# define FRACBITS 15
|
||||
# define SAMPPROD int32_t
|
||||
#define SAMP_MAX INT16_MAX
|
||||
#define SAMP_MIN INT16_MIN
|
||||
#endif
|
||||
|
||||
#if defined(CHECK_OVERFLOW)
|
||||
# define CHECK_OVERFLOW_OP(a,op,b) \
|
||||
if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
|
||||
KISS_FFT_WARNING("overflow (%d " #op" %d) = %ld", (a),(b),(SAMPPROD)(a) op (SAMPPROD)(b)); }
|
||||
#endif
|
||||
|
||||
|
||||
# define smul(a,b) ( (SAMPPROD)(a)*(b) )
|
||||
# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
|
||||
|
||||
# define S_MUL(a,b) sround( smul(a,b) )
|
||||
|
||||
# define C_MUL(m,a,b) \
|
||||
do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
|
||||
(m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
|
||||
|
||||
# define DIVSCALAR(x,k) \
|
||||
(x) = sround( smul( x, SAMP_MAX/k ) )
|
||||
|
||||
# define C_FIXDIV(c,div) \
|
||||
do { DIVSCALAR( (c).r , div); \
|
||||
DIVSCALAR( (c).i , div); }while (0)
|
||||
|
||||
# define C_MULBYSCALAR( c, s ) \
|
||||
do{ (c).r = sround( smul( (c).r , s ) ) ;\
|
||||
(c).i = sround( smul( (c).i , s ) ) ; }while(0)
|
||||
|
||||
#else /* not FIXED_POINT*/
|
||||
|
||||
# define S_MUL(a,b) ( (a)*(b) )
|
||||
#define C_MUL(m,a,b) \
|
||||
do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
|
||||
(m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
|
||||
# define C_FIXDIV(c,div) /* NOOP */
|
||||
# define C_MULBYSCALAR( c, s ) \
|
||||
do{ (c).r *= (s);\
|
||||
(c).i *= (s); }while(0)
|
||||
#endif
|
||||
|
||||
#ifndef CHECK_OVERFLOW_OP
|
||||
# define CHECK_OVERFLOW_OP(a,op,b) /* noop */
|
||||
#endif
|
||||
|
||||
#define C_ADD( res, a,b)\
|
||||
do { \
|
||||
CHECK_OVERFLOW_OP((a).r,+,(b).r)\
|
||||
CHECK_OVERFLOW_OP((a).i,+,(b).i)\
|
||||
(res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \
|
||||
}while(0)
|
||||
#define C_SUB( res, a,b)\
|
||||
do { \
|
||||
CHECK_OVERFLOW_OP((a).r,-,(b).r)\
|
||||
CHECK_OVERFLOW_OP((a).i,-,(b).i)\
|
||||
(res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \
|
||||
}while(0)
|
||||
#define C_ADDTO( res , a)\
|
||||
do { \
|
||||
CHECK_OVERFLOW_OP((res).r,+,(a).r)\
|
||||
CHECK_OVERFLOW_OP((res).i,+,(a).i)\
|
||||
(res).r += (a).r; (res).i += (a).i;\
|
||||
}while(0)
|
||||
|
||||
#define C_SUBFROM( res , a)\
|
||||
do {\
|
||||
CHECK_OVERFLOW_OP((res).r,-,(a).r)\
|
||||
CHECK_OVERFLOW_OP((res).i,-,(a).i)\
|
||||
(res).r -= (a).r; (res).i -= (a).i; \
|
||||
}while(0)
|
||||
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
# define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase))
|
||||
# define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase))
|
||||
# define HALF_OF(x) ((x)>>1)
|
||||
#elif defined(USE_SIMD)
|
||||
# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
|
||||
# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
|
||||
# define HALF_OF(x) ((x)*_mm_set1_ps(.5))
|
||||
#else
|
||||
# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
|
||||
# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
|
||||
# define HALF_OF(x) ((x)*((kiss_fft_scalar).5))
|
||||
#endif
|
||||
|
||||
#define kf_cexp(x,phase) \
|
||||
do{ \
|
||||
(x)->r = KISS_FFT_COS(phase);\
|
||||
(x)->i = KISS_FFT_SIN(phase);\
|
||||
}while(0)
|
||||
|
||||
|
||||
/* a debugging function */
|
||||
#define pcpx(c)\
|
||||
KISS_FFT_DEBUG("%g + %gi\n",(double)((c)->r),(double)((c)->i))
|
||||
|
||||
|
||||
#ifdef KISS_FFT_USE_ALLOCA
|
||||
// define this to allow use of alloca instead of malloc for temporary buffers
|
||||
// Temporary buffers are used in two case:
|
||||
// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
|
||||
// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform.
|
||||
#include <alloca.h>
|
||||
#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
|
||||
#define KISS_FFT_TMP_FREE(ptr)
|
||||
#else
|
||||
#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
|
||||
#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
|
||||
#endif
|
||||
|
||||
#endif /* _kiss_fft_guts_h */
|
||||
|
||||
420
src/ext/kissfft/kiss_fft.c
Normal file
420
src/ext/kissfft/kiss_fft.c
Normal file
@@ -0,0 +1,420 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
|
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
* See COPYING file for more information.
|
||||
*/
|
||||
|
||||
|
||||
#include "_kiss_fft_guts.h"
|
||||
/* The guts header contains all the multiplication and addition macros that are defined for
|
||||
fixed or floating point complex numbers. It also delares the kf_ internal functions.
|
||||
*/
|
||||
|
||||
static void kf_bfly2(
|
||||
kiss_fft_cpx * Fout,
|
||||
const size_t fstride,
|
||||
const kiss_fft_cfg st,
|
||||
int m
|
||||
)
|
||||
{
|
||||
kiss_fft_cpx * Fout2;
|
||||
kiss_fft_cpx * tw1 = st->twiddles;
|
||||
kiss_fft_cpx t;
|
||||
Fout2 = Fout + m;
|
||||
do{
|
||||
C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
|
||||
|
||||
C_MUL (t, *Fout2 , *tw1);
|
||||
tw1 += fstride;
|
||||
C_SUB( *Fout2 , *Fout , t );
|
||||
C_ADDTO( *Fout , t );
|
||||
++Fout2;
|
||||
++Fout;
|
||||
}while (--m);
|
||||
}
|
||||
|
||||
static void kf_bfly4(
|
||||
kiss_fft_cpx * Fout,
|
||||
const size_t fstride,
|
||||
const kiss_fft_cfg st,
|
||||
const size_t m
|
||||
)
|
||||
{
|
||||
kiss_fft_cpx *tw1,*tw2,*tw3;
|
||||
kiss_fft_cpx scratch[6];
|
||||
size_t k=m;
|
||||
const size_t m2=2*m;
|
||||
const size_t m3=3*m;
|
||||
|
||||
|
||||
tw3 = tw2 = tw1 = st->twiddles;
|
||||
|
||||
do {
|
||||
C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
|
||||
|
||||
C_MUL(scratch[0],Fout[m] , *tw1 );
|
||||
C_MUL(scratch[1],Fout[m2] , *tw2 );
|
||||
C_MUL(scratch[2],Fout[m3] , *tw3 );
|
||||
|
||||
C_SUB( scratch[5] , *Fout, scratch[1] );
|
||||
C_ADDTO(*Fout, scratch[1]);
|
||||
C_ADD( scratch[3] , scratch[0] , scratch[2] );
|
||||
C_SUB( scratch[4] , scratch[0] , scratch[2] );
|
||||
C_SUB( Fout[m2], *Fout, scratch[3] );
|
||||
tw1 += fstride;
|
||||
tw2 += fstride*2;
|
||||
tw3 += fstride*3;
|
||||
C_ADDTO( *Fout , scratch[3] );
|
||||
|
||||
if(st->inverse) {
|
||||
Fout[m].r = scratch[5].r - scratch[4].i;
|
||||
Fout[m].i = scratch[5].i + scratch[4].r;
|
||||
Fout[m3].r = scratch[5].r + scratch[4].i;
|
||||
Fout[m3].i = scratch[5].i - scratch[4].r;
|
||||
}else{
|
||||
Fout[m].r = scratch[5].r + scratch[4].i;
|
||||
Fout[m].i = scratch[5].i - scratch[4].r;
|
||||
Fout[m3].r = scratch[5].r - scratch[4].i;
|
||||
Fout[m3].i = scratch[5].i + scratch[4].r;
|
||||
}
|
||||
++Fout;
|
||||
}while(--k);
|
||||
}
|
||||
|
||||
static void kf_bfly3(
|
||||
kiss_fft_cpx * Fout,
|
||||
const size_t fstride,
|
||||
const kiss_fft_cfg st,
|
||||
size_t m
|
||||
)
|
||||
{
|
||||
size_t k=m;
|
||||
const size_t m2 = 2*m;
|
||||
kiss_fft_cpx *tw1,*tw2;
|
||||
kiss_fft_cpx scratch[5];
|
||||
kiss_fft_cpx epi3;
|
||||
epi3 = st->twiddles[fstride*m];
|
||||
|
||||
tw1=tw2=st->twiddles;
|
||||
|
||||
do{
|
||||
C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
|
||||
|
||||
C_MUL(scratch[1],Fout[m] , *tw1);
|
||||
C_MUL(scratch[2],Fout[m2] , *tw2);
|
||||
|
||||
C_ADD(scratch[3],scratch[1],scratch[2]);
|
||||
C_SUB(scratch[0],scratch[1],scratch[2]);
|
||||
tw1 += fstride;
|
||||
tw2 += fstride*2;
|
||||
|
||||
Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
|
||||
Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
|
||||
|
||||
C_MULBYSCALAR( scratch[0] , epi3.i );
|
||||
|
||||
C_ADDTO(*Fout,scratch[3]);
|
||||
|
||||
Fout[m2].r = Fout[m].r + scratch[0].i;
|
||||
Fout[m2].i = Fout[m].i - scratch[0].r;
|
||||
|
||||
Fout[m].r -= scratch[0].i;
|
||||
Fout[m].i += scratch[0].r;
|
||||
|
||||
++Fout;
|
||||
}while(--k);
|
||||
}
|
||||
|
||||
static void kf_bfly5(
|
||||
kiss_fft_cpx * Fout,
|
||||
const size_t fstride,
|
||||
const kiss_fft_cfg st,
|
||||
int m
|
||||
)
|
||||
{
|
||||
kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
|
||||
int u;
|
||||
kiss_fft_cpx scratch[13];
|
||||
kiss_fft_cpx * twiddles = st->twiddles;
|
||||
kiss_fft_cpx *tw;
|
||||
kiss_fft_cpx ya,yb;
|
||||
ya = twiddles[fstride*m];
|
||||
yb = twiddles[fstride*2*m];
|
||||
|
||||
Fout0=Fout;
|
||||
Fout1=Fout0+m;
|
||||
Fout2=Fout0+2*m;
|
||||
Fout3=Fout0+3*m;
|
||||
Fout4=Fout0+4*m;
|
||||
|
||||
tw=st->twiddles;
|
||||
for ( u=0; u<m; ++u ) {
|
||||
C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
|
||||
scratch[0] = *Fout0;
|
||||
|
||||
C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
|
||||
C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
|
||||
C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
|
||||
C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
|
||||
|
||||
C_ADD( scratch[7],scratch[1],scratch[4]);
|
||||
C_SUB( scratch[10],scratch[1],scratch[4]);
|
||||
C_ADD( scratch[8],scratch[2],scratch[3]);
|
||||
C_SUB( scratch[9],scratch[2],scratch[3]);
|
||||
|
||||
Fout0->r += scratch[7].r + scratch[8].r;
|
||||
Fout0->i += scratch[7].i + scratch[8].i;
|
||||
|
||||
scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
|
||||
scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
|
||||
|
||||
scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
|
||||
scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
|
||||
|
||||
C_SUB(*Fout1,scratch[5],scratch[6]);
|
||||
C_ADD(*Fout4,scratch[5],scratch[6]);
|
||||
|
||||
scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
|
||||
scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
|
||||
scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
|
||||
scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
|
||||
|
||||
C_ADD(*Fout2,scratch[11],scratch[12]);
|
||||
C_SUB(*Fout3,scratch[11],scratch[12]);
|
||||
|
||||
++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
|
||||
}
|
||||
}
|
||||
|
||||
/* perform the butterfly for one stage of a mixed radix FFT */
|
||||
static void kf_bfly_generic(
|
||||
kiss_fft_cpx * Fout,
|
||||
const size_t fstride,
|
||||
const kiss_fft_cfg st,
|
||||
int m,
|
||||
int p
|
||||
)
|
||||
{
|
||||
int u,k,q1,q;
|
||||
kiss_fft_cpx * twiddles = st->twiddles;
|
||||
kiss_fft_cpx t;
|
||||
int Norig = st->nfft;
|
||||
|
||||
kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
|
||||
if (scratch == NULL){
|
||||
KISS_FFT_ERROR("Memory allocation failed.");
|
||||
return;
|
||||
}
|
||||
|
||||
for ( u=0; u<m; ++u ) {
|
||||
k=u;
|
||||
for ( q1=0 ; q1<p ; ++q1 ) {
|
||||
scratch[q1] = Fout[ k ];
|
||||
C_FIXDIV(scratch[q1],p);
|
||||
k += m;
|
||||
}
|
||||
|
||||
k=u;
|
||||
for ( q1=0 ; q1<p ; ++q1 ) {
|
||||
int twidx=0;
|
||||
Fout[ k ] = scratch[0];
|
||||
for (q=1;q<p;++q ) {
|
||||
twidx += fstride * k;
|
||||
if (twidx>=Norig) twidx-=Norig;
|
||||
C_MUL(t,scratch[q] , twiddles[twidx] );
|
||||
C_ADDTO( Fout[ k ] ,t);
|
||||
}
|
||||
k += m;
|
||||
}
|
||||
}
|
||||
KISS_FFT_TMP_FREE(scratch);
|
||||
}
|
||||
|
||||
static
|
||||
void kf_work(
|
||||
kiss_fft_cpx * Fout,
|
||||
const kiss_fft_cpx * f,
|
||||
const size_t fstride,
|
||||
int in_stride,
|
||||
int * factors,
|
||||
const kiss_fft_cfg st
|
||||
)
|
||||
{
|
||||
kiss_fft_cpx * Fout_beg=Fout;
|
||||
const int p=*factors++; /* the radix */
|
||||
const int m=*factors++; /* stage's fft length/p */
|
||||
const kiss_fft_cpx * Fout_end = Fout + p*m;
|
||||
|
||||
#ifdef _OPENMP
|
||||
// use openmp extensions at the
|
||||
// top-level (not recursive)
|
||||
if (fstride==1 && p<=5 && m!=1)
|
||||
{
|
||||
int k;
|
||||
|
||||
// execute the p different work units in different threads
|
||||
# pragma omp parallel for
|
||||
for (k=0;k<p;++k)
|
||||
kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
|
||||
// all threads have joined by this point
|
||||
|
||||
switch (p) {
|
||||
case 2: kf_bfly2(Fout,fstride,st,m); break;
|
||||
case 3: kf_bfly3(Fout,fstride,st,m); break;
|
||||
case 4: kf_bfly4(Fout,fstride,st,m); break;
|
||||
case 5: kf_bfly5(Fout,fstride,st,m); break;
|
||||
default: kf_bfly_generic(Fout,fstride,st,m,p); break;
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (m==1) {
|
||||
do{
|
||||
*Fout = *f;
|
||||
f += fstride*in_stride;
|
||||
}while(++Fout != Fout_end );
|
||||
}else{
|
||||
do{
|
||||
// recursive call:
|
||||
// DFT of size m*p performed by doing
|
||||
// p instances of smaller DFTs of size m,
|
||||
// each one takes a decimated version of the input
|
||||
kf_work( Fout , f, fstride*p, in_stride, factors,st);
|
||||
f += fstride*in_stride;
|
||||
}while( (Fout += m) != Fout_end );
|
||||
}
|
||||
|
||||
Fout=Fout_beg;
|
||||
|
||||
// recombine the p smaller DFTs
|
||||
switch (p) {
|
||||
case 2: kf_bfly2(Fout,fstride,st,m); break;
|
||||
case 3: kf_bfly3(Fout,fstride,st,m); break;
|
||||
case 4: kf_bfly4(Fout,fstride,st,m); break;
|
||||
case 5: kf_bfly5(Fout,fstride,st,m); break;
|
||||
default: kf_bfly_generic(Fout,fstride,st,m,p); break;
|
||||
}
|
||||
}
|
||||
|
||||
/* facbuf is populated by p1,m1,p2,m2, ...
|
||||
where
|
||||
p[i] * m[i] = m[i-1]
|
||||
m0 = n */
|
||||
static
|
||||
void kf_factor(int n,int * facbuf)
|
||||
{
|
||||
int p=4;
|
||||
double floor_sqrt;
|
||||
floor_sqrt = floor( sqrt((double)n) );
|
||||
|
||||
/*factor out powers of 4, powers of 2, then any remaining primes */
|
||||
do {
|
||||
while (n % p) {
|
||||
switch (p) {
|
||||
case 4: p = 2; break;
|
||||
case 2: p = 3; break;
|
||||
default: p += 2; break;
|
||||
}
|
||||
if (p > floor_sqrt)
|
||||
p = n; /* no more factors, skip to end */
|
||||
}
|
||||
n /= p;
|
||||
*facbuf++ = p;
|
||||
*facbuf++ = n;
|
||||
} while (n > 1);
|
||||
}
|
||||
|
||||
/*
|
||||
*
|
||||
* User-callable function to allocate all necessary storage space for the fft.
|
||||
*
|
||||
* The return value is a contiguous block of memory, allocated with malloc. As such,
|
||||
* It can be freed with free(), rather than a kiss_fft-specific function.
|
||||
* */
|
||||
kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
|
||||
{
|
||||
KISS_FFT_ALIGN_CHECK(mem)
|
||||
|
||||
kiss_fft_cfg st=NULL;
|
||||
size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fft_state)
|
||||
+ sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/
|
||||
|
||||
if ( lenmem==NULL ) {
|
||||
st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
|
||||
}else{
|
||||
if (mem != NULL && *lenmem >= memneeded)
|
||||
st = (kiss_fft_cfg)mem;
|
||||
*lenmem = memneeded;
|
||||
}
|
||||
if (st) {
|
||||
int i;
|
||||
st->nfft=nfft;
|
||||
st->inverse = inverse_fft;
|
||||
|
||||
for (i=0;i<nfft;++i) {
|
||||
const double pi=3.141592653589793238462643383279502884197169399375105820974944;
|
||||
double phase = -2*pi*i / nfft;
|
||||
if (st->inverse)
|
||||
phase *= -1;
|
||||
kf_cexp(st->twiddles+i, phase );
|
||||
}
|
||||
|
||||
kf_factor(nfft,st->factors);
|
||||
}
|
||||
return st;
|
||||
}
|
||||
|
||||
|
||||
void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
|
||||
{
|
||||
if (fin == fout) {
|
||||
//NOTE: this is not really an in-place FFT algorithm.
|
||||
//It just performs an out-of-place FFT into a temp buffer
|
||||
if (fout == NULL){
|
||||
KISS_FFT_ERROR("fout buffer NULL.");
|
||||
return;
|
||||
}
|
||||
|
||||
kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
|
||||
if (tmpbuf == NULL){
|
||||
KISS_FFT_ERROR("Memory allocation error.");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
|
||||
kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
|
||||
memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
|
||||
KISS_FFT_TMP_FREE(tmpbuf);
|
||||
}else{
|
||||
kf_work( fout, fin, 1,in_stride, st->factors,st );
|
||||
}
|
||||
}
|
||||
|
||||
void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
||||
{
|
||||
kiss_fft_stride(cfg,fin,fout,1);
|
||||
}
|
||||
|
||||
|
||||
void kiss_fft_cleanup(void)
|
||||
{
|
||||
// nothing needed any more
|
||||
}
|
||||
|
||||
int kiss_fft_next_fast_size(int n)
|
||||
{
|
||||
while(1) {
|
||||
int m=n;
|
||||
while ( (m%2) == 0 ) m/=2;
|
||||
while ( (m%3) == 0 ) m/=3;
|
||||
while ( (m%5) == 0 ) m/=5;
|
||||
if (m<=1)
|
||||
break; /* n is completely factorable by twos, threes, and fives */
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
160
src/ext/kissfft/kiss_fft.h
Normal file
160
src/ext/kissfft/kiss_fft.h
Normal file
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
|
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
* See COPYING file for more information.
|
||||
*/
|
||||
|
||||
#ifndef KISS_FFT_H
|
||||
#define KISS_FFT_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
// Define KISS_FFT_SHARED macro to properly export symbols
|
||||
#ifdef KISS_FFT_SHARED
|
||||
# ifdef _WIN32
|
||||
# ifdef KISS_FFT_BUILD
|
||||
# define KISS_FFT_API __declspec(dllexport)
|
||||
# else
|
||||
# define KISS_FFT_API __declspec(dllimport)
|
||||
# endif
|
||||
# else
|
||||
# define KISS_FFT_API __attribute__ ((visibility ("default")))
|
||||
# endif
|
||||
#else
|
||||
# define KISS_FFT_API
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
ATTENTION!
|
||||
If you would like a :
|
||||
-- a utility that will handle the caching of fft objects
|
||||
-- real-only (no imaginary time component ) FFT
|
||||
-- a multi-dimensional FFT
|
||||
-- a command-line utility to perform ffts
|
||||
-- a command-line utility to perform fast-convolution filtering
|
||||
|
||||
Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c
|
||||
in the tools/ directory.
|
||||
*/
|
||||
|
||||
/* User may override KISS_FFT_MALLOC and/or KISS_FFT_FREE. */
|
||||
#ifdef USE_SIMD
|
||||
# include <xmmintrin.h>
|
||||
# define kiss_fft_scalar __m128
|
||||
# ifndef KISS_FFT_MALLOC
|
||||
# define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
|
||||
# define KISS_FFT_ALIGN_CHECK(ptr)
|
||||
# define KISS_FFT_ALIGN_SIZE_UP(size) ((size + 15UL) & ~0xFUL)
|
||||
# endif
|
||||
# ifndef KISS_FFT_FREE
|
||||
# define KISS_FFT_FREE _mm_free
|
||||
# endif
|
||||
#else
|
||||
# define KISS_FFT_ALIGN_CHECK(ptr)
|
||||
# define KISS_FFT_ALIGN_SIZE_UP(size) (size)
|
||||
# ifndef KISS_FFT_MALLOC
|
||||
# define KISS_FFT_MALLOC malloc
|
||||
# endif
|
||||
# ifndef KISS_FFT_FREE
|
||||
# define KISS_FFT_FREE free
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
#include <stdint.h>
|
||||
# if (FIXED_POINT == 32)
|
||||
# define kiss_fft_scalar int32_t
|
||||
# else
|
||||
# define kiss_fft_scalar int16_t
|
||||
# endif
|
||||
#else
|
||||
# ifndef kiss_fft_scalar
|
||||
/* default is float */
|
||||
# define kiss_fft_scalar float
|
||||
# endif
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
kiss_fft_scalar r;
|
||||
kiss_fft_scalar i;
|
||||
}kiss_fft_cpx;
|
||||
|
||||
typedef struct kiss_fft_state* kiss_fft_cfg;
|
||||
|
||||
/*
|
||||
* kiss_fft_alloc
|
||||
*
|
||||
* Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
|
||||
*
|
||||
* typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
|
||||
*
|
||||
* The return value from fft_alloc is a cfg buffer used internally
|
||||
* by the fft routine or NULL.
|
||||
*
|
||||
* If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
|
||||
* The returned value should be free()d when done to avoid memory leaks.
|
||||
*
|
||||
* The state can be placed in a user supplied buffer 'mem':
|
||||
* If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
|
||||
* then the function places the cfg in mem and the size used in *lenmem
|
||||
* and returns mem.
|
||||
*
|
||||
* If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
|
||||
* then the function returns NULL and places the minimum cfg
|
||||
* buffer size in *lenmem.
|
||||
* */
|
||||
|
||||
kiss_fft_cfg KISS_FFT_API kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem);
|
||||
|
||||
/*
|
||||
* kiss_fft(cfg,in_out_buf)
|
||||
*
|
||||
* Perform an FFT on a complex input buffer.
|
||||
* for a forward FFT,
|
||||
* fin should be f[0] , f[1] , ... ,f[nfft-1]
|
||||
* fout will be F[0] , F[1] , ... ,F[nfft-1]
|
||||
* Note that each element is complex and can be accessed like
|
||||
f[k].r and f[k].i
|
||||
* */
|
||||
void KISS_FFT_API kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
|
||||
|
||||
/*
|
||||
A more generic version of the above function. It reads its input from every Nth sample.
|
||||
* */
|
||||
void KISS_FFT_API kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride);
|
||||
|
||||
/* If kiss_fft_alloc allocated a buffer, it is one contiguous
|
||||
buffer and can be simply free()d when no longer needed*/
|
||||
#define kiss_fft_free KISS_FFT_FREE
|
||||
|
||||
/*
|
||||
Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up
|
||||
your compiler output to call this before you exit.
|
||||
*/
|
||||
void KISS_FFT_API kiss_fft_cleanup(void);
|
||||
|
||||
|
||||
/*
|
||||
* Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
|
||||
*/
|
||||
int KISS_FFT_API kiss_fft_next_fast_size(int n);
|
||||
|
||||
/* for real ffts, we need an even size */
|
||||
#define kiss_fftr_next_fast_size_real(n) \
|
||||
(kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
36
src/ext/kissfft/kiss_fft_log.h
Normal file
36
src/ext/kissfft/kiss_fft_log.h
Normal file
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
|
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
* See COPYING file for more information.
|
||||
*/
|
||||
|
||||
#ifndef kiss_fft_log_h
|
||||
#define kiss_fft_log_h
|
||||
|
||||
#define ERROR 1
|
||||
#define WARNING 2
|
||||
#define INFO 3
|
||||
#define DEBUG 4
|
||||
|
||||
#define STRINGIFY(x) #x
|
||||
#define TOSTRING(x) STRINGIFY(x)
|
||||
|
||||
#if defined(NDEBUG)
|
||||
# define KISS_FFT_LOG_MSG(severity, ...) ((void)0)
|
||||
#else
|
||||
# define KISS_FFT_LOG_MSG(severity, ...) \
|
||||
fprintf(stderr, "[" #severity "] " __FILE__ ":" TOSTRING(__LINE__) " "); \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fprintf(stderr, "\n")
|
||||
#endif
|
||||
|
||||
#define KISS_FFT_ERROR(...) KISS_FFT_LOG_MSG(ERROR, __VA_ARGS__)
|
||||
#define KISS_FFT_WARNING(...) KISS_FFT_LOG_MSG(WARNING, __VA_ARGS__)
|
||||
#define KISS_FFT_INFO(...) KISS_FFT_LOG_MSG(INFO, __VA_ARGS__)
|
||||
#define KISS_FFT_DEBUG(...) KISS_FFT_LOG_MSG(DEBUG, __VA_ARGS__)
|
||||
|
||||
|
||||
|
||||
#endif /* kiss_fft_log_h */
|
||||
155
src/ext/kissfft/kiss_fftr.c
Normal file
155
src/ext/kissfft/kiss_fftr.c
Normal file
@@ -0,0 +1,155 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
|
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
* See COPYING file for more information.
|
||||
*/
|
||||
|
||||
#include "kiss_fftr.h"
|
||||
#include "_kiss_fft_guts.h"
|
||||
|
||||
struct kiss_fftr_state{
|
||||
kiss_fft_cfg substate;
|
||||
kiss_fft_cpx * tmpbuf;
|
||||
kiss_fft_cpx * super_twiddles;
|
||||
#ifdef USE_SIMD
|
||||
void * pad;
|
||||
#endif
|
||||
};
|
||||
|
||||
kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
|
||||
{
|
||||
KISS_FFT_ALIGN_CHECK(mem)
|
||||
|
||||
int i;
|
||||
kiss_fftr_cfg st = NULL;
|
||||
size_t subsize = 0, memneeded;
|
||||
|
||||
if (nfft & 1) {
|
||||
KISS_FFT_ERROR("Real FFT optimization must be even.");
|
||||
return NULL;
|
||||
}
|
||||
nfft >>= 1;
|
||||
|
||||
kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
|
||||
memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2);
|
||||
|
||||
if (lenmem == NULL) {
|
||||
st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
|
||||
} else {
|
||||
if (*lenmem >= memneeded)
|
||||
st = (kiss_fftr_cfg) mem;
|
||||
*lenmem = memneeded;
|
||||
}
|
||||
if (!st)
|
||||
return NULL;
|
||||
|
||||
st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
|
||||
st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
|
||||
st->super_twiddles = st->tmpbuf + nfft;
|
||||
kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
|
||||
|
||||
for (i = 0; i < nfft/2; ++i) {
|
||||
double phase =
|
||||
-3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5);
|
||||
if (inverse_fft)
|
||||
phase *= -1;
|
||||
kf_cexp (st->super_twiddles+i,phase);
|
||||
}
|
||||
return st;
|
||||
}
|
||||
|
||||
void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
|
||||
{
|
||||
/* input buffer timedata is stored row-wise */
|
||||
int k,ncfft;
|
||||
kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
|
||||
|
||||
if ( st->substate->inverse) {
|
||||
KISS_FFT_ERROR("kiss fft usage error: improper alloc");
|
||||
return;/* The caller did not call the correct function */
|
||||
}
|
||||
|
||||
ncfft = st->substate->nfft;
|
||||
|
||||
/*perform the parallel fft of two real signals packed in real,imag*/
|
||||
kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
|
||||
/* The real part of the DC element of the frequency spectrum in st->tmpbuf
|
||||
* contains the sum of the even-numbered elements of the input time sequence
|
||||
* The imag part is the sum of the odd-numbered elements
|
||||
*
|
||||
* The sum of tdc.r and tdc.i is the sum of the input time sequence.
|
||||
* yielding DC of input time sequence
|
||||
* The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
|
||||
* yielding Nyquist bin of input time sequence
|
||||
*/
|
||||
|
||||
tdc.r = st->tmpbuf[0].r;
|
||||
tdc.i = st->tmpbuf[0].i;
|
||||
C_FIXDIV(tdc,2);
|
||||
CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
|
||||
CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
|
||||
freqdata[0].r = tdc.r + tdc.i;
|
||||
freqdata[ncfft].r = tdc.r - tdc.i;
|
||||
#ifdef USE_SIMD
|
||||
freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
|
||||
#else
|
||||
freqdata[ncfft].i = freqdata[0].i = 0;
|
||||
#endif
|
||||
|
||||
for ( k=1;k <= ncfft/2 ; ++k ) {
|
||||
fpk = st->tmpbuf[k];
|
||||
fpnk.r = st->tmpbuf[ncfft-k].r;
|
||||
fpnk.i = - st->tmpbuf[ncfft-k].i;
|
||||
C_FIXDIV(fpk,2);
|
||||
C_FIXDIV(fpnk,2);
|
||||
|
||||
C_ADD( f1k, fpk , fpnk );
|
||||
C_SUB( f2k, fpk , fpnk );
|
||||
C_MUL( tw , f2k , st->super_twiddles[k-1]);
|
||||
|
||||
freqdata[k].r = HALF_OF(f1k.r + tw.r);
|
||||
freqdata[k].i = HALF_OF(f1k.i + tw.i);
|
||||
freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
|
||||
freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
|
||||
}
|
||||
}
|
||||
|
||||
void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
|
||||
{
|
||||
/* input buffer timedata is stored row-wise */
|
||||
int k, ncfft;
|
||||
|
||||
if (st->substate->inverse == 0) {
|
||||
KISS_FFT_ERROR("kiss fft usage error: improper alloc");
|
||||
return;/* The caller did not call the correct function */
|
||||
}
|
||||
|
||||
ncfft = st->substate->nfft;
|
||||
|
||||
st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
|
||||
st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
|
||||
C_FIXDIV(st->tmpbuf[0],2);
|
||||
|
||||
for (k = 1; k <= ncfft / 2; ++k) {
|
||||
kiss_fft_cpx fk, fnkc, fek, fok, tmp;
|
||||
fk = freqdata[k];
|
||||
fnkc.r = freqdata[ncfft - k].r;
|
||||
fnkc.i = -freqdata[ncfft - k].i;
|
||||
C_FIXDIV( fk , 2 );
|
||||
C_FIXDIV( fnkc , 2 );
|
||||
|
||||
C_ADD (fek, fk, fnkc);
|
||||
C_SUB (tmp, fk, fnkc);
|
||||
C_MUL (fok, tmp, st->super_twiddles[k-1]);
|
||||
C_ADD (st->tmpbuf[k], fek, fok);
|
||||
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
|
||||
#ifdef USE_SIMD
|
||||
st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
|
||||
#else
|
||||
st->tmpbuf[ncfft - k].i *= -1;
|
||||
#endif
|
||||
}
|
||||
kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
|
||||
}
|
||||
54
src/ext/kissfft/kiss_fftr.h
Normal file
54
src/ext/kissfft/kiss_fftr.h
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
|
||||
* This file is part of KISS FFT - https://github.com/mborgerding/kissfft
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
* See COPYING file for more information.
|
||||
*/
|
||||
|
||||
#ifndef KISS_FTR_H
|
||||
#define KISS_FTR_H
|
||||
|
||||
#include "kiss_fft.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
|
||||
Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
|
||||
|
||||
|
||||
|
||||
*/
|
||||
|
||||
typedef struct kiss_fftr_state *kiss_fftr_cfg;
|
||||
|
||||
|
||||
kiss_fftr_cfg KISS_FFT_API kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
|
||||
/*
|
||||
nfft must be even
|
||||
|
||||
If you don't care to allocate space, use mem = lenmem = NULL
|
||||
*/
|
||||
|
||||
|
||||
void KISS_FFT_API kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata);
|
||||
/*
|
||||
input timedata has nfft scalar points
|
||||
output freqdata has nfft/2+1 complex points
|
||||
*/
|
||||
|
||||
void KISS_FFT_API kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata);
|
||||
/*
|
||||
input freqdata has nfft/2+1 complex points
|
||||
output timedata has nfft scalar points
|
||||
*/
|
||||
|
||||
#define kiss_fftr_free KISS_FFT_FREE
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
301
src/ext/pommier/neon_mathfun.h
Normal file
301
src/ext/pommier/neon_mathfun.h
Normal file
@@ -0,0 +1,301 @@
|
||||
/* NEON implementation of sin, cos, exp and log
|
||||
|
||||
Inspired by Intel Approximate Math library, and based on the
|
||||
corresponding algorithms of the cephes math library
|
||||
*/
|
||||
|
||||
/* Copyright (C) 2011 Julien Pommier
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
(this is the zlib license)
|
||||
*/
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
typedef float32x4_t v4sf; // vector of 4 float
|
||||
typedef uint32x4_t v4su; // vector of 4 uint32
|
||||
typedef int32x4_t v4si; // vector of 4 uint32
|
||||
|
||||
#define c_inv_mant_mask ~0x7f800000u
|
||||
#define c_cephes_SQRTHF 0.707106781186547524
|
||||
#define c_cephes_log_p0 7.0376836292E-2
|
||||
#define c_cephes_log_p1 - 1.1514610310E-1
|
||||
#define c_cephes_log_p2 1.1676998740E-1
|
||||
#define c_cephes_log_p3 - 1.2420140846E-1
|
||||
#define c_cephes_log_p4 + 1.4249322787E-1
|
||||
#define c_cephes_log_p5 - 1.6668057665E-1
|
||||
#define c_cephes_log_p6 + 2.0000714765E-1
|
||||
#define c_cephes_log_p7 - 2.4999993993E-1
|
||||
#define c_cephes_log_p8 + 3.3333331174E-1
|
||||
#define c_cephes_log_q1 -2.12194440e-4
|
||||
#define c_cephes_log_q2 0.693359375
|
||||
|
||||
/* natural logarithm computed for 4 simultaneous float
|
||||
return NaN for x <= 0
|
||||
*/
|
||||
v4sf log_ps(v4sf x) {
|
||||
v4sf one = vdupq_n_f32(1);
|
||||
|
||||
x = vmaxq_f32(x, vdupq_n_f32(0)); /* force flush to zero on denormal values */
|
||||
v4su invalid_mask = vcleq_f32(x, vdupq_n_f32(0));
|
||||
|
||||
v4si ux = vreinterpretq_s32_f32(x);
|
||||
|
||||
v4si emm0 = vshrq_n_s32(ux, 23);
|
||||
|
||||
/* keep only the fractional part */
|
||||
ux = vandq_s32(ux, vdupq_n_s32(c_inv_mant_mask));
|
||||
ux = vorrq_s32(ux, vreinterpretq_s32_f32(vdupq_n_f32(0.5f)));
|
||||
x = vreinterpretq_f32_s32(ux);
|
||||
|
||||
emm0 = vsubq_s32(emm0, vdupq_n_s32(0x7f));
|
||||
v4sf e = vcvtq_f32_s32(emm0);
|
||||
|
||||
e = vaddq_f32(e, one);
|
||||
|
||||
/* part2:
|
||||
if( x < SQRTHF ) {
|
||||
e -= 1;
|
||||
x = x + x - 1.0;
|
||||
} else { x = x - 1.0; }
|
||||
*/
|
||||
v4su mask = vcltq_f32(x, vdupq_n_f32(c_cephes_SQRTHF));
|
||||
v4sf tmp = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(x), mask));
|
||||
x = vsubq_f32(x, one);
|
||||
e = vsubq_f32(e, vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(one), mask)));
|
||||
x = vaddq_f32(x, tmp);
|
||||
|
||||
v4sf z = vmulq_f32(x,x);
|
||||
|
||||
v4sf y = vdupq_n_f32(c_cephes_log_p0);
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p1));
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p2));
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p3));
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p4));
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p5));
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p6));
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p7));
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, vdupq_n_f32(c_cephes_log_p8));
|
||||
y = vmulq_f32(y, x);
|
||||
|
||||
y = vmulq_f32(y, z);
|
||||
|
||||
|
||||
tmp = vmulq_f32(e, vdupq_n_f32(c_cephes_log_q1));
|
||||
y = vaddq_f32(y, tmp);
|
||||
|
||||
|
||||
tmp = vmulq_f32(z, vdupq_n_f32(0.5f));
|
||||
y = vsubq_f32(y, tmp);
|
||||
|
||||
tmp = vmulq_f32(e, vdupq_n_f32(c_cephes_log_q2));
|
||||
x = vaddq_f32(x, y);
|
||||
x = vaddq_f32(x, tmp);
|
||||
x = vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(x), invalid_mask)); // negative arg will be NAN
|
||||
return x;
|
||||
}
|
||||
|
||||
#define c_exp_hi 88.3762626647949f
|
||||
#define c_exp_lo -88.3762626647949f
|
||||
|
||||
#define c_cephes_LOG2EF 1.44269504088896341
|
||||
#define c_cephes_exp_C1 0.693359375
|
||||
#define c_cephes_exp_C2 -2.12194440e-4
|
||||
|
||||
#define c_cephes_exp_p0 1.9875691500E-4
|
||||
#define c_cephes_exp_p1 1.3981999507E-3
|
||||
#define c_cephes_exp_p2 8.3334519073E-3
|
||||
#define c_cephes_exp_p3 4.1665795894E-2
|
||||
#define c_cephes_exp_p4 1.6666665459E-1
|
||||
#define c_cephes_exp_p5 5.0000001201E-1
|
||||
|
||||
/* exp() computed for 4 float at once */
|
||||
v4sf exp_ps(v4sf x) {
|
||||
v4sf tmp, fx;
|
||||
|
||||
v4sf one = vdupq_n_f32(1);
|
||||
x = vminq_f32(x, vdupq_n_f32(c_exp_hi));
|
||||
x = vmaxq_f32(x, vdupq_n_f32(c_exp_lo));
|
||||
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
fx = vmlaq_f32(vdupq_n_f32(0.5f), x, vdupq_n_f32(c_cephes_LOG2EF));
|
||||
|
||||
/* perform a floorf */
|
||||
tmp = vcvtq_f32_s32(vcvtq_s32_f32(fx));
|
||||
|
||||
/* if greater, substract 1 */
|
||||
v4su mask = vcgtq_f32(tmp, fx);
|
||||
mask = vandq_u32(mask, vreinterpretq_u32_f32(one));
|
||||
|
||||
|
||||
fx = vsubq_f32(tmp, vreinterpretq_f32_u32(mask));
|
||||
|
||||
tmp = vmulq_f32(fx, vdupq_n_f32(c_cephes_exp_C1));
|
||||
v4sf z = vmulq_f32(fx, vdupq_n_f32(c_cephes_exp_C2));
|
||||
x = vsubq_f32(x, tmp);
|
||||
x = vsubq_f32(x, z);
|
||||
|
||||
static const float32_t cephes_exp_p[6] = { c_cephes_exp_p0, c_cephes_exp_p1, c_cephes_exp_p2, c_cephes_exp_p3, c_cephes_exp_p4, c_cephes_exp_p5 };
|
||||
v4sf y = vld1q_dup_f32(cephes_exp_p+0);
|
||||
v4sf c1 = vld1q_dup_f32(cephes_exp_p+1);
|
||||
v4sf c2 = vld1q_dup_f32(cephes_exp_p+2);
|
||||
v4sf c3 = vld1q_dup_f32(cephes_exp_p+3);
|
||||
v4sf c4 = vld1q_dup_f32(cephes_exp_p+4);
|
||||
v4sf c5 = vld1q_dup_f32(cephes_exp_p+5);
|
||||
|
||||
y = vmulq_f32(y, x);
|
||||
z = vmulq_f32(x,x);
|
||||
y = vaddq_f32(y, c1);
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, c2);
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, c3);
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, c4);
|
||||
y = vmulq_f32(y, x);
|
||||
y = vaddq_f32(y, c5);
|
||||
|
||||
y = vmulq_f32(y, z);
|
||||
y = vaddq_f32(y, x);
|
||||
y = vaddq_f32(y, one);
|
||||
|
||||
/* build 2^n */
|
||||
int32x4_t mm;
|
||||
mm = vcvtq_s32_f32(fx);
|
||||
mm = vaddq_s32(mm, vdupq_n_s32(0x7f));
|
||||
mm = vshlq_n_s32(mm, 23);
|
||||
v4sf pow2n = vreinterpretq_f32_s32(mm);
|
||||
|
||||
y = vmulq_f32(y, pow2n);
|
||||
return y;
|
||||
}
|
||||
|
||||
#define c_minus_cephes_DP1 -0.78515625
|
||||
#define c_minus_cephes_DP2 -2.4187564849853515625e-4
|
||||
#define c_minus_cephes_DP3 -3.77489497744594108e-8
|
||||
#define c_sincof_p0 -1.9515295891E-4
|
||||
#define c_sincof_p1 8.3321608736E-3
|
||||
#define c_sincof_p2 -1.6666654611E-1
|
||||
#define c_coscof_p0 2.443315711809948E-005
|
||||
#define c_coscof_p1 -1.388731625493765E-003
|
||||
#define c_coscof_p2 4.166664568298827E-002
|
||||
#define c_cephes_FOPI 1.27323954473516 // 4 / M_PI
|
||||
|
||||
/* evaluation of 4 sines & cosines at once.
|
||||
|
||||
The code is the exact rewriting of the cephes sinf function.
|
||||
Precision is excellent as long as x < 8192 (I did not bother to
|
||||
take into account the special handling they have for greater values
|
||||
-- it does not return garbage for arguments over 8192, though, but
|
||||
the extra precision is missing).
|
||||
|
||||
Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
|
||||
surprising but correct result.
|
||||
|
||||
Note also that when you compute sin(x), cos(x) is available at
|
||||
almost no extra price so both sin_ps and cos_ps make use of
|
||||
sincos_ps..
|
||||
*/
|
||||
void sincos_ps(v4sf x, v4sf *ysin, v4sf *ycos) { // any x
|
||||
v4sf xmm1, xmm2, xmm3, y;
|
||||
|
||||
v4su emm2;
|
||||
|
||||
v4su sign_mask_sin, sign_mask_cos;
|
||||
sign_mask_sin = vcltq_f32(x, vdupq_n_f32(0));
|
||||
x = vabsq_f32(x);
|
||||
|
||||
/* scale by 4/Pi */
|
||||
y = vmulq_f32(x, vdupq_n_f32(c_cephes_FOPI));
|
||||
|
||||
/* store the integer part of y in mm0 */
|
||||
emm2 = vcvtq_u32_f32(y);
|
||||
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||
emm2 = vaddq_u32(emm2, vdupq_n_u32(1));
|
||||
emm2 = vandq_u32(emm2, vdupq_n_u32(~1));
|
||||
y = vcvtq_f32_u32(emm2);
|
||||
|
||||
/* get the polynom selection mask
|
||||
there is one polynom for 0 <= x <= Pi/4
|
||||
and another one for Pi/4<x<=Pi/2
|
||||
|
||||
Both branches will be computed.
|
||||
*/
|
||||
v4su poly_mask = vtstq_u32(emm2, vdupq_n_u32(2));
|
||||
|
||||
/* The magic pass: "Extended precision modular arithmetic"
|
||||
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
||||
xmm1 = vmulq_n_f32(y, c_minus_cephes_DP1);
|
||||
xmm2 = vmulq_n_f32(y, c_minus_cephes_DP2);
|
||||
xmm3 = vmulq_n_f32(y, c_minus_cephes_DP3);
|
||||
x = vaddq_f32(x, xmm1);
|
||||
x = vaddq_f32(x, xmm2);
|
||||
x = vaddq_f32(x, xmm3);
|
||||
|
||||
sign_mask_sin = veorq_u32(sign_mask_sin, vtstq_u32(emm2, vdupq_n_u32(4)));
|
||||
sign_mask_cos = vtstq_u32(vsubq_u32(emm2, vdupq_n_u32(2)), vdupq_n_u32(4));
|
||||
|
||||
/* Evaluate the first polynom (0 <= x <= Pi/4) in y1,
|
||||
and the second polynom (Pi/4 <= x <= 0) in y2 */
|
||||
v4sf z = vmulq_f32(x,x);
|
||||
v4sf y1, y2;
|
||||
|
||||
y1 = vmulq_n_f32(z, c_coscof_p0);
|
||||
y2 = vmulq_n_f32(z, c_sincof_p0);
|
||||
y1 = vaddq_f32(y1, vdupq_n_f32(c_coscof_p1));
|
||||
y2 = vaddq_f32(y2, vdupq_n_f32(c_sincof_p1));
|
||||
y1 = vmulq_f32(y1, z);
|
||||
y2 = vmulq_f32(y2, z);
|
||||
y1 = vaddq_f32(y1, vdupq_n_f32(c_coscof_p2));
|
||||
y2 = vaddq_f32(y2, vdupq_n_f32(c_sincof_p2));
|
||||
y1 = vmulq_f32(y1, z);
|
||||
y2 = vmulq_f32(y2, z);
|
||||
y1 = vmulq_f32(y1, z);
|
||||
y2 = vmulq_f32(y2, x);
|
||||
y1 = vsubq_f32(y1, vmulq_f32(z, vdupq_n_f32(0.5f)));
|
||||
y2 = vaddq_f32(y2, x);
|
||||
y1 = vaddq_f32(y1, vdupq_n_f32(1));
|
||||
|
||||
/* select the correct result from the two polynoms */
|
||||
v4sf ys = vbslq_f32(poly_mask, y1, y2);
|
||||
v4sf yc = vbslq_f32(poly_mask, y2, y1);
|
||||
*ysin = vbslq_f32(sign_mask_sin, vnegq_f32(ys), ys);
|
||||
*ycos = vbslq_f32(sign_mask_cos, yc, vnegq_f32(yc));
|
||||
}
|
||||
|
||||
v4sf sin_ps(v4sf x) {
|
||||
v4sf ysin, ycos;
|
||||
sincos_ps(x, &ysin, &ycos);
|
||||
return ysin;
|
||||
}
|
||||
|
||||
v4sf cos_ps(v4sf x) {
|
||||
v4sf ysin, ycos;
|
||||
sincos_ps(x, &ysin, &ycos);
|
||||
return ycos;
|
||||
}
|
||||
|
||||
|
||||
766
src/ext/pommier/sse_mathfun.h
Normal file
766
src/ext/pommier/sse_mathfun.h
Normal file
@@ -0,0 +1,766 @@
|
||||
|
||||
#ifndef POMMIER_SSE_MATHFUN_H
|
||||
#define POMMIER_SSE_MATHFUN_H
|
||||
|
||||
/* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log
|
||||
|
||||
Inspired by Intel Approximate Math library, and based on the
|
||||
corresponding algorithms of the cephes math library
|
||||
|
||||
The default is to use the SSE1 version. If you define USE_SSE2 the
|
||||
the SSE2 intrinsics will be used in place of the MMX intrinsics. Do
|
||||
not expect any significant performance improvement with SSE2.
|
||||
*/
|
||||
|
||||
/* Copyright (C) 2007 Julien Pommier
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
(this is the zlib license)
|
||||
*/
|
||||
|
||||
#include <xmmintrin.h>
|
||||
|
||||
/* yes I know, the top of this file is quite ugly */
|
||||
|
||||
#ifdef _MSC_VER /* visual c++ */
|
||||
# define ALIGN16_BEG __declspec(align(16))
|
||||
# define ALIGN16_END
|
||||
#else /* gcc or icc */
|
||||
# define ALIGN16_BEG
|
||||
# define ALIGN16_END __attribute__((aligned(16)))
|
||||
#endif
|
||||
|
||||
/* __m128 is ugly to write */
|
||||
typedef __m128 v4sf; // vector of 4 float (sse1)
|
||||
|
||||
#ifdef USE_SSE2
|
||||
# include <emmintrin.h>
|
||||
typedef __m128i v4si; // vector of 4 int (sse2)
|
||||
#else
|
||||
typedef __m64 v2si; // vector of 2 int (mmx)
|
||||
#endif
|
||||
|
||||
/* declare some SSE constants -- why can't I figure a better way to do that? */
|
||||
#define _PS_CONST(Name, Val) \
|
||||
static const ALIGN16_BEG float _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
|
||||
#define _PI32_CONST(Name, Val) \
|
||||
static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
|
||||
#define _PS_CONST_TYPE(Name, Type, Val) \
|
||||
static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
|
||||
|
||||
_PS_CONST(1 , 1.0f);
|
||||
_PS_CONST(0p5, 0.5f);
|
||||
/* the smallest non denormalized float number */
|
||||
_PS_CONST_TYPE(min_norm_pos, int, 0x00800000);
|
||||
_PS_CONST_TYPE(mant_mask, int, 0x7f800000);
|
||||
_PS_CONST_TYPE(inv_mant_mask, int, ~0x7f800000);
|
||||
|
||||
_PS_CONST_TYPE(sign_mask, int, (int)0x80000000);
|
||||
_PS_CONST_TYPE(inv_sign_mask, int, ~0x80000000);
|
||||
|
||||
_PI32_CONST(1, 1);
|
||||
_PI32_CONST(inv1, ~1);
|
||||
_PI32_CONST(2, 2);
|
||||
_PI32_CONST(4, 4);
|
||||
_PI32_CONST(0x7f, 0x7f);
|
||||
|
||||
_PS_CONST(cephes_SQRTHF, 0.707106781186547524);
|
||||
_PS_CONST(cephes_log_p0, 7.0376836292E-2);
|
||||
_PS_CONST(cephes_log_p1, - 1.1514610310E-1);
|
||||
_PS_CONST(cephes_log_p2, 1.1676998740E-1);
|
||||
_PS_CONST(cephes_log_p3, - 1.2420140846E-1);
|
||||
_PS_CONST(cephes_log_p4, + 1.4249322787E-1);
|
||||
_PS_CONST(cephes_log_p5, - 1.6668057665E-1);
|
||||
_PS_CONST(cephes_log_p6, + 2.0000714765E-1);
|
||||
_PS_CONST(cephes_log_p7, - 2.4999993993E-1);
|
||||
_PS_CONST(cephes_log_p8, + 3.3333331174E-1);
|
||||
_PS_CONST(cephes_log_q1, -2.12194440e-4);
|
||||
_PS_CONST(cephes_log_q2, 0.693359375);
|
||||
|
||||
#if defined (__MINGW32__)
|
||||
|
||||
/* the ugly part below: many versions of gcc used to be completely buggy with respect to some intrinsics
|
||||
The movehl_ps is fixed in mingw 3.4.5, but I found out that all the _mm_cmp* intrinsics were completely
|
||||
broken on my mingw gcc 3.4.5 ...
|
||||
|
||||
Note that the bug on _mm_cmp* does occur only at -O0 optimization level
|
||||
*/
|
||||
|
||||
inline __m128 my_movehl_ps(__m128 a, const __m128 b) {
|
||||
asm (
|
||||
"movhlps %2,%0\n\t"
|
||||
: "=x" (a)
|
||||
: "0" (a), "x"(b)
|
||||
);
|
||||
return a; }
|
||||
#warning "redefined _mm_movehl_ps (see gcc bug 21179)"
|
||||
#define _mm_movehl_ps my_movehl_ps
|
||||
|
||||
inline __m128 my_cmplt_ps(__m128 a, const __m128 b) {
|
||||
asm (
|
||||
"cmpltps %2,%0\n\t"
|
||||
: "=x" (a)
|
||||
: "0" (a), "x"(b)
|
||||
);
|
||||
return a;
|
||||
}
|
||||
inline __m128 my_cmpgt_ps(__m128 a, const __m128 b) {
|
||||
asm (
|
||||
"cmpnleps %2,%0\n\t"
|
||||
: "=x" (a)
|
||||
: "0" (a), "x"(b)
|
||||
);
|
||||
return a;
|
||||
}
|
||||
inline __m128 my_cmpeq_ps(__m128 a, const __m128 b) {
|
||||
asm (
|
||||
"cmpeqps %2,%0\n\t"
|
||||
: "=x" (a)
|
||||
: "0" (a), "x"(b)
|
||||
);
|
||||
return a;
|
||||
}
|
||||
#warning "redefined _mm_cmpxx_ps functions..."
|
||||
#define _mm_cmplt_ps my_cmplt_ps
|
||||
#define _mm_cmpgt_ps my_cmpgt_ps
|
||||
#define _mm_cmpeq_ps my_cmpeq_ps
|
||||
#endif
|
||||
|
||||
#ifndef USE_SSE2
|
||||
typedef union xmm_mm_union {
|
||||
__m128 xmm;
|
||||
__m64 mm[2];
|
||||
} xmm_mm_union;
|
||||
|
||||
#define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) { \
|
||||
xmm_mm_union u; u.xmm = xmm_; \
|
||||
mm0_ = u.mm[0]; \
|
||||
mm1_ = u.mm[1]; \
|
||||
}
|
||||
|
||||
#define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) { \
|
||||
xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm; \
|
||||
}
|
||||
|
||||
#endif // USE_SSE2
|
||||
|
||||
/* natural logarithm computed for 4 simultaneous float
|
||||
return NaN for x <= 0
|
||||
*/
|
||||
v4sf log_ps(v4sf x) {
|
||||
#ifdef USE_SSE2
|
||||
v4si emm0;
|
||||
#else
|
||||
v2si mm0, mm1;
|
||||
#endif
|
||||
v4sf one = *(v4sf*)_ps_1;
|
||||
|
||||
v4sf invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
|
||||
|
||||
x = _mm_max_ps(x, *(v4sf*)_ps_min_norm_pos); /* cut off denormalized stuff */
|
||||
|
||||
#ifndef USE_SSE2
|
||||
/* part 1: x = frexpf(x, &e); */
|
||||
COPY_XMM_TO_MM(x, mm0, mm1);
|
||||
mm0 = _mm_srli_pi32(mm0, 23);
|
||||
mm1 = _mm_srli_pi32(mm1, 23);
|
||||
#else
|
||||
emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
|
||||
#endif
|
||||
/* keep only the fractional part */
|
||||
x = _mm_and_ps(x, *(v4sf*)_ps_inv_mant_mask);
|
||||
x = _mm_or_ps(x, *(v4sf*)_ps_0p5);
|
||||
|
||||
#ifndef USE_SSE2
|
||||
/* now e=mm0:mm1 contain the really base-2 exponent */
|
||||
mm0 = _mm_sub_pi32(mm0, *(v2si*)_pi32_0x7f);
|
||||
mm1 = _mm_sub_pi32(mm1, *(v2si*)_pi32_0x7f);
|
||||
v4sf e = _mm_cvtpi32x2_ps(mm0, mm1);
|
||||
_mm_empty(); /* bye bye mmx */
|
||||
#else
|
||||
emm0 = _mm_sub_epi32(emm0, *(v4si*)_pi32_0x7f);
|
||||
v4sf e = _mm_cvtepi32_ps(emm0);
|
||||
#endif
|
||||
|
||||
e = _mm_add_ps(e, one);
|
||||
|
||||
/* part2:
|
||||
if( x < SQRTHF ) {
|
||||
e -= 1;
|
||||
x = x + x - 1.0;
|
||||
} else { x = x - 1.0; }
|
||||
*/
|
||||
v4sf mask = _mm_cmplt_ps(x, *(v4sf*)_ps_cephes_SQRTHF);
|
||||
v4sf tmp = _mm_and_ps(x, mask);
|
||||
x = _mm_sub_ps(x, one);
|
||||
e = _mm_sub_ps(e, _mm_and_ps(one, mask));
|
||||
x = _mm_add_ps(x, tmp);
|
||||
|
||||
|
||||
v4sf z = _mm_mul_ps(x,x);
|
||||
|
||||
v4sf y = *(v4sf*)_ps_cephes_log_p0;
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p1);
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p2);
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p3);
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p4);
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p5);
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p6);
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p7);
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p8);
|
||||
y = _mm_mul_ps(y, x);
|
||||
|
||||
y = _mm_mul_ps(y, z);
|
||||
|
||||
|
||||
tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q1);
|
||||
y = _mm_add_ps(y, tmp);
|
||||
|
||||
|
||||
tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
|
||||
y = _mm_sub_ps(y, tmp);
|
||||
|
||||
tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q2);
|
||||
x = _mm_add_ps(x, y);
|
||||
x = _mm_add_ps(x, tmp);
|
||||
x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN
|
||||
return x;
|
||||
}
|
||||
|
||||
_PS_CONST(exp_hi, 88.3762626647949f);
|
||||
_PS_CONST(exp_lo, -88.3762626647949f);
|
||||
|
||||
_PS_CONST(cephes_LOG2EF, 1.44269504088896341);
|
||||
_PS_CONST(cephes_exp_C1, 0.693359375);
|
||||
_PS_CONST(cephes_exp_C2, -2.12194440e-4);
|
||||
|
||||
_PS_CONST(cephes_exp_p0, 1.9875691500E-4);
|
||||
_PS_CONST(cephes_exp_p1, 1.3981999507E-3);
|
||||
_PS_CONST(cephes_exp_p2, 8.3334519073E-3);
|
||||
_PS_CONST(cephes_exp_p3, 4.1665795894E-2);
|
||||
_PS_CONST(cephes_exp_p4, 1.6666665459E-1);
|
||||
_PS_CONST(cephes_exp_p5, 5.0000001201E-1);
|
||||
|
||||
v4sf exp_ps(v4sf x) {
|
||||
v4sf tmp = _mm_setzero_ps(), fx;
|
||||
#ifdef USE_SSE2
|
||||
v4si emm0;
|
||||
#else
|
||||
v2si mm0, mm1;
|
||||
#endif
|
||||
v4sf one = *(v4sf*)_ps_1;
|
||||
|
||||
x = _mm_min_ps(x, *(v4sf*)_ps_exp_hi);
|
||||
x = _mm_max_ps(x, *(v4sf*)_ps_exp_lo);
|
||||
|
||||
/* express exp(x) as exp(g + n*log(2)) */
|
||||
fx = _mm_mul_ps(x, *(v4sf*)_ps_cephes_LOG2EF);
|
||||
fx = _mm_add_ps(fx, *(v4sf*)_ps_0p5);
|
||||
|
||||
/* how to perform a floorf with SSE: just below */
|
||||
#ifndef USE_SSE2
|
||||
/* step 1 : cast to int */
|
||||
tmp = _mm_movehl_ps(tmp, fx);
|
||||
mm0 = _mm_cvttps_pi32(fx);
|
||||
mm1 = _mm_cvttps_pi32(tmp);
|
||||
/* step 2 : cast back to float */
|
||||
tmp = _mm_cvtpi32x2_ps(mm0, mm1);
|
||||
#else
|
||||
emm0 = _mm_cvttps_epi32(fx);
|
||||
tmp = _mm_cvtepi32_ps(emm0);
|
||||
#endif
|
||||
/* if greater, substract 1 */
|
||||
v4sf mask = _mm_cmpgt_ps(tmp, fx);
|
||||
mask = _mm_and_ps(mask, one);
|
||||
fx = _mm_sub_ps(tmp, mask);
|
||||
|
||||
tmp = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C1);
|
||||
v4sf z = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C2);
|
||||
x = _mm_sub_ps(x, tmp);
|
||||
x = _mm_sub_ps(x, z);
|
||||
|
||||
z = _mm_mul_ps(x,x);
|
||||
|
||||
v4sf y = *(v4sf*)_ps_cephes_exp_p0;
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p1);
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p2);
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p3);
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p4);
|
||||
y = _mm_mul_ps(y, x);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p5);
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_add_ps(y, x);
|
||||
y = _mm_add_ps(y, one);
|
||||
|
||||
/* build 2^n */
|
||||
#ifndef USE_SSE2
|
||||
z = _mm_movehl_ps(z, fx);
|
||||
mm0 = _mm_cvttps_pi32(fx);
|
||||
mm1 = _mm_cvttps_pi32(z);
|
||||
mm0 = _mm_add_pi32(mm0, *(v2si*)_pi32_0x7f);
|
||||
mm1 = _mm_add_pi32(mm1, *(v2si*)_pi32_0x7f);
|
||||
mm0 = _mm_slli_pi32(mm0, 23);
|
||||
mm1 = _mm_slli_pi32(mm1, 23);
|
||||
|
||||
v4sf pow2n;
|
||||
COPY_MM_TO_XMM(mm0, mm1, pow2n);
|
||||
_mm_empty();
|
||||
#else
|
||||
emm0 = _mm_cvttps_epi32(fx);
|
||||
emm0 = _mm_add_epi32(emm0, *(v4si*)_pi32_0x7f);
|
||||
emm0 = _mm_slli_epi32(emm0, 23);
|
||||
v4sf pow2n = _mm_castsi128_ps(emm0);
|
||||
#endif
|
||||
y = _mm_mul_ps(y, pow2n);
|
||||
return y;
|
||||
}
|
||||
|
||||
_PS_CONST(minus_cephes_DP1, -0.78515625);
|
||||
_PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
|
||||
_PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
|
||||
_PS_CONST(sincof_p0, -1.9515295891E-4);
|
||||
_PS_CONST(sincof_p1, 8.3321608736E-3);
|
||||
_PS_CONST(sincof_p2, -1.6666654611E-1);
|
||||
_PS_CONST(coscof_p0, 2.443315711809948E-005);
|
||||
_PS_CONST(coscof_p1, -1.388731625493765E-003);
|
||||
_PS_CONST(coscof_p2, 4.166664568298827E-002);
|
||||
_PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
|
||||
|
||||
|
||||
/* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so
|
||||
it runs also on old athlons XPs and the pentium III of your grand
|
||||
mother.
|
||||
|
||||
The code is the exact rewriting of the cephes sinf function.
|
||||
Precision is excellent as long as x < 8192 (I did not bother to
|
||||
take into account the special handling they have for greater values
|
||||
-- it does not return garbage for arguments over 8192, though, but
|
||||
the extra precision is missing).
|
||||
|
||||
Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
|
||||
surprising but correct result.
|
||||
|
||||
Performance is also surprisingly good, 1.33 times faster than the
|
||||
macos vsinf SSE2 function, and 1.5 times faster than the
|
||||
__vrs4_sinf of amd's ACML (which is only available in 64 bits). Not
|
||||
too bad for an SSE1 function (with no special tuning) !
|
||||
However the latter libraries probably have a much better handling of NaN,
|
||||
Inf, denormalized and other special arguments..
|
||||
|
||||
On my core 1 duo, the execution of this function takes approximately 95 cycles.
|
||||
|
||||
From what I have observed on the experiments with Intel AMath lib, switching to an
|
||||
SSE2 version would improve the perf by only 10%.
|
||||
|
||||
Since it is based on SSE intrinsics, it has to be compiled at -O2 to
|
||||
deliver full speed.
|
||||
*/
|
||||
v4sf sin_ps(v4sf x) { // any x
|
||||
v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
|
||||
|
||||
#ifdef USE_SSE2
|
||||
v4si emm0, emm2;
|
||||
#else
|
||||
v2si mm0, mm1, mm2, mm3;
|
||||
#endif
|
||||
sign_bit = x;
|
||||
/* take the absolute value */
|
||||
x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
|
||||
/* extract the sign bit (upper one) */
|
||||
sign_bit = _mm_and_ps(sign_bit, *(v4sf*)_ps_sign_mask);
|
||||
|
||||
/* scale by 4/Pi */
|
||||
y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
|
||||
|
||||
//printf("plop:"); print4(y);
|
||||
#ifdef USE_SSE2
|
||||
/* store the integer part of y in mm0 */
|
||||
emm2 = _mm_cvttps_epi32(y);
|
||||
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||
emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
|
||||
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
|
||||
y = _mm_cvtepi32_ps(emm2);
|
||||
/* get the swap sign flag */
|
||||
emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
|
||||
emm0 = _mm_slli_epi32(emm0, 29);
|
||||
/* get the polynom selection mask
|
||||
there is one polynom for 0 <= x <= Pi/4
|
||||
and another one for Pi/4<x<=Pi/2
|
||||
|
||||
Both branches will be computed.
|
||||
*/
|
||||
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
|
||||
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
|
||||
|
||||
v4sf swap_sign_bit = _mm_castsi128_ps(emm0);
|
||||
v4sf poly_mask = _mm_castsi128_ps(emm2);
|
||||
sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
|
||||
#else
|
||||
/* store the integer part of y in mm0:mm1 */
|
||||
xmm2 = _mm_movehl_ps(xmm2, y);
|
||||
mm2 = _mm_cvttps_pi32(y);
|
||||
mm3 = _mm_cvttps_pi32(xmm2);
|
||||
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||
mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
|
||||
mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
|
||||
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
|
||||
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
|
||||
y = _mm_cvtpi32x2_ps(mm2, mm3);
|
||||
/* get the swap sign flag */
|
||||
mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
|
||||
mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
|
||||
mm0 = _mm_slli_pi32(mm0, 29);
|
||||
mm1 = _mm_slli_pi32(mm1, 29);
|
||||
/* get the polynom selection mask */
|
||||
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
|
||||
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
|
||||
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
|
||||
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
|
||||
v4sf swap_sign_bit, poly_mask;
|
||||
COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit);
|
||||
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
|
||||
sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
|
||||
_mm_empty(); /* good-bye mmx */
|
||||
#endif
|
||||
|
||||
/* The magic pass: "Extended precision modular arithmetic"
|
||||
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
||||
xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
|
||||
xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
|
||||
xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
|
||||
xmm1 = _mm_mul_ps(y, xmm1);
|
||||
xmm2 = _mm_mul_ps(y, xmm2);
|
||||
xmm3 = _mm_mul_ps(y, xmm3);
|
||||
x = _mm_add_ps(x, xmm1);
|
||||
x = _mm_add_ps(x, xmm2);
|
||||
x = _mm_add_ps(x, xmm3);
|
||||
|
||||
/* Evaluate the first polynom (0 <= x <= Pi/4) */
|
||||
y = *(v4sf*)_ps_coscof_p0;
|
||||
v4sf z = _mm_mul_ps(x,x);
|
||||
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_mul_ps(y, z);
|
||||
v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
|
||||
y = _mm_sub_ps(y, tmp);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_1);
|
||||
|
||||
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
|
||||
|
||||
v4sf y2 = *(v4sf*)_ps_sincof_p0;
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_mul_ps(y2, x);
|
||||
y2 = _mm_add_ps(y2, x);
|
||||
|
||||
/* select the correct result from the two polynoms */
|
||||
xmm3 = poly_mask;
|
||||
y2 = _mm_and_ps(xmm3, y2); //, xmm3);
|
||||
y = _mm_andnot_ps(xmm3, y);
|
||||
y = _mm_add_ps(y,y2);
|
||||
/* update the sign */
|
||||
y = _mm_xor_ps(y, sign_bit);
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
/* almost the same as sin_ps */
|
||||
v4sf cos_ps(v4sf x) { // any x
|
||||
v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
|
||||
#ifdef USE_SSE2
|
||||
v4si emm0, emm2;
|
||||
#else
|
||||
v2si mm0, mm1, mm2, mm3;
|
||||
#endif
|
||||
/* take the absolute value */
|
||||
x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
|
||||
|
||||
/* scale by 4/Pi */
|
||||
y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
|
||||
|
||||
#ifdef USE_SSE2
|
||||
/* store the integer part of y in mm0 */
|
||||
emm2 = _mm_cvttps_epi32(y);
|
||||
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||
emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
|
||||
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
|
||||
y = _mm_cvtepi32_ps(emm2);
|
||||
|
||||
emm2 = _mm_sub_epi32(emm2, *(v4si*)_pi32_2);
|
||||
|
||||
/* get the swap sign flag */
|
||||
emm0 = _mm_andnot_si128(emm2, *(v4si*)_pi32_4);
|
||||
emm0 = _mm_slli_epi32(emm0, 29);
|
||||
/* get the polynom selection mask */
|
||||
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
|
||||
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
|
||||
|
||||
v4sf sign_bit = _mm_castsi128_ps(emm0);
|
||||
v4sf poly_mask = _mm_castsi128_ps(emm2);
|
||||
#else
|
||||
/* store the integer part of y in mm0:mm1 */
|
||||
xmm2 = _mm_movehl_ps(xmm2, y);
|
||||
mm2 = _mm_cvttps_pi32(y);
|
||||
mm3 = _mm_cvttps_pi32(xmm2);
|
||||
|
||||
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||
mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
|
||||
mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
|
||||
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
|
||||
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
|
||||
|
||||
y = _mm_cvtpi32x2_ps(mm2, mm3);
|
||||
|
||||
|
||||
mm2 = _mm_sub_pi32(mm2, *(v2si*)_pi32_2);
|
||||
mm3 = _mm_sub_pi32(mm3, *(v2si*)_pi32_2);
|
||||
|
||||
/* get the swap sign flag in mm0:mm1 and the
|
||||
polynom selection mask in mm2:mm3 */
|
||||
|
||||
mm0 = _mm_andnot_si64(mm2, *(v2si*)_pi32_4);
|
||||
mm1 = _mm_andnot_si64(mm3, *(v2si*)_pi32_4);
|
||||
mm0 = _mm_slli_pi32(mm0, 29);
|
||||
mm1 = _mm_slli_pi32(mm1, 29);
|
||||
|
||||
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
|
||||
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
|
||||
|
||||
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
|
||||
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
|
||||
|
||||
v4sf sign_bit, poly_mask;
|
||||
COPY_MM_TO_XMM(mm0, mm1, sign_bit);
|
||||
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
|
||||
_mm_empty(); /* good-bye mmx */
|
||||
#endif
|
||||
/* The magic pass: "Extended precision modular arithmetic"
|
||||
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
||||
xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
|
||||
xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
|
||||
xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
|
||||
xmm1 = _mm_mul_ps(y, xmm1);
|
||||
xmm2 = _mm_mul_ps(y, xmm2);
|
||||
xmm3 = _mm_mul_ps(y, xmm3);
|
||||
x = _mm_add_ps(x, xmm1);
|
||||
x = _mm_add_ps(x, xmm2);
|
||||
x = _mm_add_ps(x, xmm3);
|
||||
|
||||
/* Evaluate the first polynom (0 <= x <= Pi/4) */
|
||||
y = *(v4sf*)_ps_coscof_p0;
|
||||
v4sf z = _mm_mul_ps(x,x);
|
||||
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_mul_ps(y, z);
|
||||
v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
|
||||
y = _mm_sub_ps(y, tmp);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_1);
|
||||
|
||||
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
|
||||
|
||||
v4sf y2 = *(v4sf*)_ps_sincof_p0;
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_mul_ps(y2, x);
|
||||
y2 = _mm_add_ps(y2, x);
|
||||
|
||||
/* select the correct result from the two polynoms */
|
||||
xmm3 = poly_mask;
|
||||
y2 = _mm_and_ps(xmm3, y2); //, xmm3);
|
||||
y = _mm_andnot_ps(xmm3, y);
|
||||
y = _mm_add_ps(y,y2);
|
||||
/* update the sign */
|
||||
y = _mm_xor_ps(y, sign_bit);
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
/* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
|
||||
it is almost as fast, and gives you a free cosine with your sine */
|
||||
void sincos_ps(v4sf x, v4sf *s, v4sf *c) {
|
||||
v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
|
||||
#ifdef USE_SSE2
|
||||
v4si emm0, emm2, emm4;
|
||||
#else
|
||||
v2si mm0, mm1, mm2, mm3, mm4, mm5;
|
||||
#endif
|
||||
sign_bit_sin = x;
|
||||
/* take the absolute value */
|
||||
x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
|
||||
/* extract the sign bit (upper one) */
|
||||
sign_bit_sin = _mm_and_ps(sign_bit_sin, *(v4sf*)_ps_sign_mask);
|
||||
|
||||
/* scale by 4/Pi */
|
||||
y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
|
||||
|
||||
#ifdef USE_SSE2
|
||||
/* store the integer part of y in emm2 */
|
||||
emm2 = _mm_cvttps_epi32(y);
|
||||
|
||||
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||
emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
|
||||
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
|
||||
y = _mm_cvtepi32_ps(emm2);
|
||||
|
||||
emm4 = emm2;
|
||||
|
||||
/* get the swap sign flag for the sine */
|
||||
emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
|
||||
emm0 = _mm_slli_epi32(emm0, 29);
|
||||
v4sf swap_sign_bit_sin = _mm_castsi128_ps(emm0);
|
||||
|
||||
/* get the polynom selection mask for the sine*/
|
||||
emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
|
||||
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
|
||||
v4sf poly_mask = _mm_castsi128_ps(emm2);
|
||||
#else
|
||||
/* store the integer part of y in mm2:mm3 */
|
||||
xmm3 = _mm_movehl_ps(xmm3, y);
|
||||
mm2 = _mm_cvttps_pi32(y);
|
||||
mm3 = _mm_cvttps_pi32(xmm3);
|
||||
|
||||
/* j=(j+1) & (~1) (see the cephes sources) */
|
||||
mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
|
||||
mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
|
||||
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
|
||||
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
|
||||
|
||||
y = _mm_cvtpi32x2_ps(mm2, mm3);
|
||||
|
||||
mm4 = mm2;
|
||||
mm5 = mm3;
|
||||
|
||||
/* get the swap sign flag for the sine */
|
||||
mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
|
||||
mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
|
||||
mm0 = _mm_slli_pi32(mm0, 29);
|
||||
mm1 = _mm_slli_pi32(mm1, 29);
|
||||
v4sf swap_sign_bit_sin;
|
||||
COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit_sin);
|
||||
|
||||
/* get the polynom selection mask for the sine */
|
||||
|
||||
mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
|
||||
mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
|
||||
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
|
||||
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
|
||||
v4sf poly_mask;
|
||||
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
|
||||
#endif
|
||||
|
||||
/* The magic pass: "Extended precision modular arithmetic"
|
||||
x = ((x - y * DP1) - y * DP2) - y * DP3; */
|
||||
xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
|
||||
xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
|
||||
xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
|
||||
xmm1 = _mm_mul_ps(y, xmm1);
|
||||
xmm2 = _mm_mul_ps(y, xmm2);
|
||||
xmm3 = _mm_mul_ps(y, xmm3);
|
||||
x = _mm_add_ps(x, xmm1);
|
||||
x = _mm_add_ps(x, xmm2);
|
||||
x = _mm_add_ps(x, xmm3);
|
||||
|
||||
#ifdef USE_SSE2
|
||||
emm4 = _mm_sub_epi32(emm4, *(v4si*)_pi32_2);
|
||||
emm4 = _mm_andnot_si128(emm4, *(v4si*)_pi32_4);
|
||||
emm4 = _mm_slli_epi32(emm4, 29);
|
||||
v4sf sign_bit_cos = _mm_castsi128_ps(emm4);
|
||||
#else
|
||||
/* get the sign flag for the cosine */
|
||||
mm4 = _mm_sub_pi32(mm4, *(v2si*)_pi32_2);
|
||||
mm5 = _mm_sub_pi32(mm5, *(v2si*)_pi32_2);
|
||||
mm4 = _mm_andnot_si64(mm4, *(v2si*)_pi32_4);
|
||||
mm5 = _mm_andnot_si64(mm5, *(v2si*)_pi32_4);
|
||||
mm4 = _mm_slli_pi32(mm4, 29);
|
||||
mm5 = _mm_slli_pi32(mm5, 29);
|
||||
v4sf sign_bit_cos;
|
||||
COPY_MM_TO_XMM(mm4, mm5, sign_bit_cos);
|
||||
_mm_empty(); /* good-bye mmx */
|
||||
#endif
|
||||
|
||||
sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
|
||||
|
||||
|
||||
/* Evaluate the first polynom (0 <= x <= Pi/4) */
|
||||
v4sf z = _mm_mul_ps(x,x);
|
||||
y = *(v4sf*)_ps_coscof_p0;
|
||||
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
|
||||
y = _mm_mul_ps(y, z);
|
||||
y = _mm_mul_ps(y, z);
|
||||
v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
|
||||
y = _mm_sub_ps(y, tmp);
|
||||
y = _mm_add_ps(y, *(v4sf*)_ps_1);
|
||||
|
||||
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
|
||||
|
||||
v4sf y2 = *(v4sf*)_ps_sincof_p0;
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
|
||||
y2 = _mm_mul_ps(y2, z);
|
||||
y2 = _mm_mul_ps(y2, x);
|
||||
y2 = _mm_add_ps(y2, x);
|
||||
|
||||
/* select the correct result from the two polynoms */
|
||||
xmm3 = poly_mask;
|
||||
v4sf ysin2 = _mm_and_ps(xmm3, y2);
|
||||
v4sf ysin1 = _mm_andnot_ps(xmm3, y);
|
||||
y2 = _mm_sub_ps(y2,ysin2);
|
||||
y = _mm_sub_ps(y, ysin1);
|
||||
|
||||
xmm1 = _mm_add_ps(ysin1,ysin2);
|
||||
xmm2 = _mm_add_ps(y,y2);
|
||||
|
||||
/* update the sign */
|
||||
*s = _mm_xor_ps(xmm1, sign_bit_sin);
|
||||
*c = _mm_xor_ps(xmm2, sign_bit_cos);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
35
src/ext/speex/COPYING
Normal file
35
src/ext/speex/COPYING
Normal file
@@ -0,0 +1,35 @@
|
||||
Copyright 2002-2007 Xiph.org Foundation
|
||||
Copyright 2002-2007 Jean-Marc Valin
|
||||
Copyright 2005-2007 Analog Devices Inc.
|
||||
Copyright 2005-2007 Commonwealth Scientific and Industrial Research
|
||||
Organisation (CSIRO)
|
||||
Copyright 1993, 2002, 2006 David Rowe
|
||||
Copyright 2003 EpicGames
|
||||
Copyright 1992-1994 Jutta Degener, Carsten Bormann
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
- Neither the name of the Xiph.org Foundation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
1265
src/ext/speex/resample.c
Normal file
1265
src/ext/speex/resample.c
Normal file
File diff suppressed because it is too large
Load Diff
301
src/ext/speex/speex_resampler.h
Normal file
301
src/ext/speex/speex_resampler.h
Normal file
@@ -0,0 +1,301 @@
|
||||
/* Copyright (C) 2007 Jean-Marc Valin
|
||||
|
||||
File: speex_resampler.h
|
||||
Resampling code
|
||||
|
||||
The design goals of this code are:
|
||||
- Very fast algorithm
|
||||
- Low memory requirement
|
||||
- Good *perceptual* quality (and not best SNR)
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
|
||||
INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SPEEX_RESAMPLER_H
|
||||
#define SPEEX_RESAMPLER_H
|
||||
|
||||
/********* WARNING: MENTAL SANITY ENDS HERE *************/
|
||||
|
||||
/* If the resampler is defined outside of Speex, we change the symbol
|
||||
names so that there won't be any clash if linking with Speex later
|
||||
on. */
|
||||
|
||||
#define RANDOM_PREFIX rubberband
|
||||
|
||||
#ifndef RANDOM_PREFIX
|
||||
#error "Please define RANDOM_PREFIX (above) to something specific to your project to prevent symbol name clashes"
|
||||
#endif
|
||||
|
||||
#define CAT_PREFIX2(a,b) a ## b
|
||||
#define CAT_PREFIX(a,b) CAT_PREFIX2(a, b)
|
||||
|
||||
#define speex_resampler_init CAT_PREFIX(RANDOM_PREFIX,_resampler_init)
|
||||
#define speex_resampler_init_frac CAT_PREFIX(RANDOM_PREFIX,_resampler_init_frac)
|
||||
#define speex_resampler_destroy CAT_PREFIX(RANDOM_PREFIX,_resampler_destroy)
|
||||
#define speex_resampler_process_float CAT_PREFIX(RANDOM_PREFIX,_resampler_process_float)
|
||||
#define speex_resampler_process_int CAT_PREFIX(RANDOM_PREFIX,_resampler_process_int)
|
||||
#define speex_resampler_process_interleaved_float CAT_PREFIX(RANDOM_PREFIX,_resampler_process_interleaved_float)
|
||||
#define speex_resampler_process_interleaved_int CAT_PREFIX(RANDOM_PREFIX,_resampler_process_interleaved_int)
|
||||
#define speex_resampler_set_rate CAT_PREFIX(RANDOM_PREFIX,_resampler_set_rate)
|
||||
#define speex_resampler_get_rate CAT_PREFIX(RANDOM_PREFIX,_resampler_get_rate)
|
||||
#define speex_resampler_set_rate_frac CAT_PREFIX(RANDOM_PREFIX,_resampler_set_rate_frac)
|
||||
#define speex_resampler_get_ratio CAT_PREFIX(RANDOM_PREFIX,_resampler_get_ratio)
|
||||
#define speex_resampler_set_quality CAT_PREFIX(RANDOM_PREFIX,_resampler_set_quality)
|
||||
#define speex_resampler_get_quality CAT_PREFIX(RANDOM_PREFIX,_resampler_get_quality)
|
||||
#define speex_resampler_set_input_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_set_input_stride)
|
||||
#define speex_resampler_get_input_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_get_input_stride)
|
||||
#define speex_resampler_set_output_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_set_output_stride)
|
||||
#define speex_resampler_get_output_stride CAT_PREFIX(RANDOM_PREFIX,_resampler_get_output_stride)
|
||||
#define speex_resampler_get_input_latency CAT_PREFIX(RANDOM_PREFIX,_resampler_get_input_latency)
|
||||
#define speex_resampler_get_output_latency CAT_PREFIX(RANDOM_PREFIX,_resampler_get_output_latency)
|
||||
#define speex_resampler_skip_zeros CAT_PREFIX(RANDOM_PREFIX,_resampler_skip_zeros)
|
||||
#define speex_resampler_reset_mem CAT_PREFIX(RANDOM_PREFIX,_resampler_reset_mem)
|
||||
#define speex_resampler_strerror CAT_PREFIX(RANDOM_PREFIX,_resampler_strerror)
|
||||
|
||||
#define spx_int16_t short
|
||||
#define spx_int32_t int
|
||||
#define spx_uint16_t unsigned short
|
||||
#define spx_uint32_t unsigned int
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define SPEEX_RESAMPLER_QUALITY_MAX 10
|
||||
#define SPEEX_RESAMPLER_QUALITY_MIN 0
|
||||
#define SPEEX_RESAMPLER_QUALITY_DEFAULT 4
|
||||
#define SPEEX_RESAMPLER_QUALITY_VOIP 3
|
||||
#define SPEEX_RESAMPLER_QUALITY_DESKTOP 5
|
||||
|
||||
enum {
|
||||
RESAMPLER_ERR_SUCCESS = 0,
|
||||
RESAMPLER_ERR_ALLOC_FAILED = 1,
|
||||
RESAMPLER_ERR_BAD_STATE = 2,
|
||||
RESAMPLER_ERR_INVALID_ARG = 3,
|
||||
RESAMPLER_ERR_PTR_OVERLAP = 4,
|
||||
|
||||
RESAMPLER_ERR_MAX_ERROR
|
||||
};
|
||||
|
||||
struct SpeexResamplerState_;
|
||||
typedef struct SpeexResamplerState_ SpeexResamplerState;
|
||||
|
||||
/** Create a new resampler with integer input and output rates.
|
||||
* @param nb_channels Number of channels to be processed
|
||||
* @param in_rate Input sampling rate (integer number of Hz).
|
||||
* @param out_rate Output sampling rate (integer number of Hz).
|
||||
* @param quality Resampling quality between 0 and 10, where 0 has poor quality
|
||||
* and 10 has very high quality.
|
||||
* @return Newly created resampler state
|
||||
* @retval NULL Error: not enough memory
|
||||
*/
|
||||
SpeexResamplerState *speex_resampler_init(spx_uint32_t nb_channels,
|
||||
spx_uint32_t in_rate,
|
||||
spx_uint32_t out_rate,
|
||||
int quality,
|
||||
int *err);
|
||||
|
||||
/** Create a new resampler with fractional input/output rates. The sampling
|
||||
* rate ratio is an arbitrary rational number with both the numerator and
|
||||
* denominator being 32-bit integers.
|
||||
* @param nb_channels Number of channels to be processed
|
||||
* @param ratio_num Numerator of the sampling rate ratio
|
||||
* @param ratio_den Denominator of the sampling rate ratio
|
||||
* @param in_rate Input sampling rate rounded to the nearest integer (in Hz).
|
||||
* @param out_rate Output sampling rate rounded to the nearest integer (in Hz).
|
||||
* @param quality Resampling quality between 0 and 10, where 0 has poor quality
|
||||
* and 10 has very high quality.
|
||||
* @return Newly created resampler state
|
||||
* @retval NULL Error: not enough memory
|
||||
*/
|
||||
SpeexResamplerState *speex_resampler_init_frac(spx_uint32_t nb_channels,
|
||||
spx_uint32_t ratio_num,
|
||||
spx_uint32_t ratio_den,
|
||||
spx_uint32_t in_rate,
|
||||
spx_uint32_t out_rate,
|
||||
int quality,
|
||||
int *err);
|
||||
|
||||
/** Destroy a resampler state.
|
||||
* @param st Resampler state
|
||||
*/
|
||||
void speex_resampler_destroy(SpeexResamplerState *st);
|
||||
|
||||
/** Resample a float array. The input and output buffers must *not* overlap.
|
||||
* @param st Resampler state
|
||||
* @param channel_index Index of the channel to process for the multi-channel
|
||||
* base (0 otherwise)
|
||||
* @param in Input buffer
|
||||
* @param in_len Number of input samples in the input buffer. Returns the
|
||||
* number of samples processed
|
||||
* @param out Output buffer
|
||||
* @param out_len Size of the output buffer. Returns the number of samples written
|
||||
*/
|
||||
int speex_resampler_process_float(SpeexResamplerState *st,
|
||||
spx_uint32_t channel_index,
|
||||
const float *in,
|
||||
spx_uint32_t *in_len,
|
||||
float *out,
|
||||
spx_uint32_t *out_len);
|
||||
|
||||
/** Resample an interleaved float array. The input and output buffers must *not* overlap.
|
||||
* @param st Resampler state
|
||||
* @param in Input buffer
|
||||
* @param in_len Number of input samples in the input buffer. Returns the number
|
||||
* of samples processed. This is all per-channel.
|
||||
* @param out Output buffer
|
||||
* @param out_len Size of the output buffer. Returns the number of samples written.
|
||||
* This is all per-channel.
|
||||
*/
|
||||
int speex_resampler_process_interleaved_float(SpeexResamplerState *st,
|
||||
const float *in,
|
||||
spx_uint32_t *in_len,
|
||||
float *out,
|
||||
spx_uint32_t *out_len);
|
||||
|
||||
/** Set (change) the input/output sampling rates (integer value).
|
||||
* @param st Resampler state
|
||||
* @param in_rate Input sampling rate (integer number of Hz).
|
||||
* @param out_rate Output sampling rate (integer number of Hz).
|
||||
*/
|
||||
int speex_resampler_set_rate(SpeexResamplerState *st,
|
||||
spx_uint32_t in_rate,
|
||||
spx_uint32_t out_rate);
|
||||
|
||||
/** Get the current input/output sampling rates (integer value).
|
||||
* @param st Resampler state
|
||||
* @param in_rate Input sampling rate (integer number of Hz) copied.
|
||||
* @param out_rate Output sampling rate (integer number of Hz) copied.
|
||||
*/
|
||||
void speex_resampler_get_rate(SpeexResamplerState *st,
|
||||
spx_uint32_t *in_rate,
|
||||
spx_uint32_t *out_rate);
|
||||
|
||||
/** Set (change) the input/output sampling rates and resampling ratio
|
||||
* (fractional values in Hz supported).
|
||||
* @param st Resampler state
|
||||
* @param ratio_num Numerator of the sampling rate ratio
|
||||
* @param ratio_den Denominator of the sampling rate ratio
|
||||
* @param in_rate Input sampling rate rounded to the nearest integer (in Hz).
|
||||
* @param out_rate Output sampling rate rounded to the nearest integer (in Hz).
|
||||
*/
|
||||
int speex_resampler_set_rate_frac(SpeexResamplerState *st,
|
||||
spx_uint32_t ratio_num,
|
||||
spx_uint32_t ratio_den,
|
||||
spx_uint32_t in_rate,
|
||||
spx_uint32_t out_rate);
|
||||
|
||||
/** Get the current resampling ratio. This will be reduced to the least
|
||||
* common denominator.
|
||||
* @param st Resampler state
|
||||
* @param ratio_num Numerator of the sampling rate ratio copied
|
||||
* @param ratio_den Denominator of the sampling rate ratio copied
|
||||
*/
|
||||
void speex_resampler_get_ratio(SpeexResamplerState *st,
|
||||
spx_uint32_t *ratio_num,
|
||||
spx_uint32_t *ratio_den);
|
||||
|
||||
/** Set (change) the conversion quality.
|
||||
* @param st Resampler state
|
||||
* @param quality Resampling quality between 0 and 10, where 0 has poor
|
||||
* quality and 10 has very high quality.
|
||||
*/
|
||||
int speex_resampler_set_quality(SpeexResamplerState *st,
|
||||
int quality);
|
||||
|
||||
/** Get the conversion quality.
|
||||
* @param st Resampler state
|
||||
* @param quality Resampling quality between 0 and 10, where 0 has poor
|
||||
* quality and 10 has very high quality.
|
||||
*/
|
||||
void speex_resampler_get_quality(SpeexResamplerState *st,
|
||||
int *quality);
|
||||
|
||||
/** Set (change) the input stride.
|
||||
* @param st Resampler state
|
||||
* @param stride Input stride
|
||||
*/
|
||||
void speex_resampler_set_input_stride(SpeexResamplerState *st,
|
||||
spx_uint32_t stride);
|
||||
|
||||
/** Get the input stride.
|
||||
* @param st Resampler state
|
||||
* @param stride Input stride copied
|
||||
*/
|
||||
void speex_resampler_get_input_stride(SpeexResamplerState *st,
|
||||
spx_uint32_t *stride);
|
||||
|
||||
/** Set (change) the output stride.
|
||||
* @param st Resampler state
|
||||
* @param stride Output stride
|
||||
*/
|
||||
void speex_resampler_set_output_stride(SpeexResamplerState *st,
|
||||
spx_uint32_t stride);
|
||||
|
||||
/** Get the output stride.
|
||||
* @param st Resampler state copied
|
||||
* @param stride Output stride
|
||||
*/
|
||||
void speex_resampler_get_output_stride(SpeexResamplerState *st,
|
||||
spx_uint32_t *stride);
|
||||
|
||||
/** Get the latency in input samples introduced by the resampler.
|
||||
* @param st Resampler state
|
||||
*/
|
||||
int speex_resampler_get_input_latency(SpeexResamplerState *st);
|
||||
|
||||
/** Get the latency in output samples introduced by the resampler.
|
||||
* @param st Resampler state
|
||||
*/
|
||||
int speex_resampler_get_output_latency(SpeexResamplerState *st);
|
||||
|
||||
/** Make sure that the first samples to go out of the resamplers don't have
|
||||
* leading zeros. This is only useful before starting to use a newly created
|
||||
* resampler. It is recommended to use that when resampling an audio file, as
|
||||
* it will generate a file with the same length. For real-time processing,
|
||||
* it is probably easier not to use this call (so that the output duration
|
||||
* is the same for the first frame).
|
||||
* @param st Resampler state
|
||||
*/
|
||||
int speex_resampler_skip_zeros(SpeexResamplerState *st);
|
||||
|
||||
/** Reset a resampler so a new (unrelated) stream can be processed.
|
||||
* @param st Resampler state
|
||||
*/
|
||||
int speex_resampler_reset_mem(SpeexResamplerState *st);
|
||||
|
||||
/** Returns the English meaning for an error code
|
||||
* @param err Error code
|
||||
* @return English string
|
||||
*/
|
||||
const char *speex_resampler_strerror(int err);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user