Merge from branch performance

This commit is contained in:
Chris Cannam
2022-09-02 16:35:46 +01:00
33 changed files with 1391 additions and 548 deletions

View File

@@ -239,59 +239,67 @@ resampler or libsamplerate.
### FFT libraries supported
The choice of FFT library makes no difference to output quality, only
to CPU usage.
```
Library Build option CPP define Notes
---- ------------ ---------- -----
Built-in -Dfft=builtin -DUSE_BUILTIN_FFT
Default except on macOS/iOS.
Can be distributed with either
the Rubber Band GPL or
commercial licence.
Built-in -Dfft=builtin -DUSE_BUILTIN_FFT Default except on macOS/iOS.
Accelerate -Dfft=vdsp -DHAVE_VDSP Default on macOS/iOS.
Best option on these platforms.
FFTW3 -Dfft=fftw -DHAVE_FFTW3 GPL.
A bit faster than built-in,
FFTW3 -Dfft=fftw -DHAVE_FFTW3 A bit faster than built-in,
a bit slower than Accelerate.
GPL licence.
KissFFT -Dfft=kissfft -DHAVE_KISSFFT
Single precision.
Only indicated for use with
SLEEF -Dfft=sleef -DHAVE_SLEEF Usually very fast. Not as widely
distributed as FFTW3. Requires
both libsleef and libsleefdft.
BSD-ish licence.
KissFFT -Dfft=kissfft -DHAVE_KISSFFT Single precision.
Only advisable when using
single-precision sample type
(see below).
Bundled, can be distributed with
either the Rubber Band GPL or
commercial licence.
BSD-ish licence.
Intel IPP -Dfft=ipp -DHAVE_IPP Proprietary, can only be used with
Intel IPP -Dfft=ipp -DHAVE_IPP Very fast on Intel hardware.
Proprietary, can only be used with
Rubber Band commercial licence.
```
### Resampler libraries supported
The choice of resampler affects both output quality, when
pitch-shifting, and CPU usage.
```
Library Build option CPP define Notes
---- ------------ ---------- -----
------- ------------ ---------- -----
Built-in -Dfft=builtin -DUSE_BQRESAMPLER
Default.
Can be distributed with either
the Rubber Band GPL or
commercial licence. Intended to
give best quality for time-varying
pitch shifts in real-time mode.
Newer than, and not as well-tested
as, libsamplerate.
Built-in -Dfft=builtin -DUSE_BQRESAMPLER Default.
Intended to give high quality
for time-varying pitch shifts
in real-time mode.
Not the fastest option.
libsamplerate -DHAVE_LIBSAMPLERATE
-Dresampler=libsamplerate Good choice in most cases.
libsamplerate -Dresampler=libsamplerate -DHAVE_LIBSAMPLERATE Good choice in most cases.
High quality and usually a bit
faster than the built-in option.
BSD-ish licence.
Speex -DUSE_SPEEX
-Dresampler=speex Can be distributed with
either the Rubber Band GPL or
commercial licence.
libspeexdsp -Dresampler=libspeexdsp -DHAVE_LIBSPEEXDSP Very fast.
May not be artifact-free for
time-varying pitch shifts.
BSD-ish licence.
Bundled Speex -Dresampler=speex -DUSE_SPEEX Older Speex code, bundled for
compatibility with some existing
projects.
Avoid for new projects.
```
## 8. Other supported #defines

View File

@@ -58,6 +58,7 @@ details.
* FFTW3 - GPL; proprietary licence needed for redistribution
* Intel IPP - Proprietary; licence needed for redistribution
* SLEEF - BSD-like
* KissFFT - BSD-like
* libsamplerate - BSD-like from version 0.1.9 onwards
* Speex - BSD-like

View File

@@ -418,7 +418,11 @@ int main(int argc, char **argv)
if (!quiet) {
if (finer) {
if (shortwin) {
cerr << "Using intermediate R3 (finer) single-windowed engine" << endl;
} else {
cerr << "Using R3 (finer) engine" << endl;
}
} else {
cerr << "Using R2 (faster) engine" << endl;
cerr << "Using crispness level: " << crispness << " (";
@@ -669,6 +673,7 @@ int main(int argc, char **argv)
RubberBandStretcher ts(sfinfo.samplerate, channels, options,
ratio, frequencyshift);
ts.setExpectedInputDuration(sfinfo.frames);
ts.setMaxProcessSize(bs);
int frame = 0;
int percent = 0;

View File

@@ -2,7 +2,7 @@
project(
'Rubber Band Library',
'c', 'cpp',
version: '3.0.0',
version: '3.1.0-pre',
license: 'GPL-2.0-or-later',
default_options: [
'cpp_std=c++11',
@@ -15,7 +15,7 @@ project(
meson_version: '>= 0.53.0'
)
rubberband_dynamic_library_version = '2.2.0'
rubberband_dynamic_library_version = '2.2.1'
system = host_machine.system()
architecture = host_machine.cpu_family()
@@ -49,6 +49,7 @@ library_sources = [
'src/common/Resampler.cpp',
'src/common/StretchCalculator.cpp',
'src/common/sysutils.cpp',
'src/common/mathmisc.cpp',
'src/common/Thread.cpp',
'src/finer/R3Stretcher.cpp',
]
@@ -112,7 +113,10 @@ foreach d: get_option('extra_include_dirs')
endforeach
fftw3_dep = dependency('fftw3', version: '>= 3.0.0', required: false)
sleef_dep = dependency('sleef', version: '>= 3.3.0', required: false)
sleefdft_dep = dependency('sleefdft', version: '>= 3.3.0', required: false)
samplerate_dep = dependency('samplerate', version: '>= 0.1.8', required: false)
speexdsp_dep = dependency('speexdsp', version: '>= 1.0.0', required: false)
sndfile_dep = dependency('sndfile', version: '>= 1.0.16', required: false)
vamp_dep = dependency('vamp-sdk', version: '>= 2.9', required: false)
boost_unit_test_dep = dependency('boost', modules: ['unit_test_framework'], version: '>= 1.73', required: false)
@@ -164,6 +168,9 @@ if fft == 'builtin'
if fftw3_dep.found()
message('(to use FFTW instead, reconfigure with -Dfft=fftw)')
endif
if sleef_dep.found()
message('(to use SLEEF instead, reconfigure with -Dfft=sleef)')
endif
feature_defines += ['-DUSE_BUILTIN_FFT']
elif fft == 'kissfft'
@@ -172,25 +179,53 @@ elif fft == 'kissfft'
if fftw3_dep.found()
message('(to use FFTW instead, reconfigure with -Dfft=fftw)')
endif
if sleef_dep.found()
message('(to use SLEEF instead, reconfigure with -Dfft=sleef)')
endif
feature_sources += ['src/ext/kissfft/kiss_fft.c', 'src/ext/kissfft/kiss_fftr.c']
feature_defines += ['-DHAVE_KISSFFT']
general_include_dirs += 'src/ext/kissfft'
elif fft == 'fftw'
if fftw3_dep.found()
config_summary += { 'FFT': 'FFTW' }
message('For FFT: using FFTW')
pkgconfig_requirements += fftw3_dep
else
fftw_dep = cpp.find_library('fftw3',
if not fftw3_dep.found()
fftw3_dep = cpp.find_library('fftw3',
dirs: get_option('extra_lib_dirs'),
has_headers: ['fftw3.h'],
header_args: extra_include_args,
required: true)
endif
config_summary += { 'FFT': 'FFTW' }
message('For FFT: using FFTW')
if sleef_dep.found()
message('(to use SLEEF instead, reconfigure with -Dfft=sleef)')
endif
pkgconfig_requirements += fftw3_dep
feature_dependencies += fftw3_dep
feature_defines += ['-DHAVE_FFTW3', '-DFFTW_DOUBLE_ONLY']
elif fft == 'sleef'
if sleefdft_dep.found() and sleef_dep.found()
config_summary += { 'FFT': 'SLEEF' }
message('For FFT: using SLEEF')
pkgconfig_requirements += sleefdft_dep
pkgconfig_requirements += sleef_dep
else
sleefdft_dep = cpp.find_library('sleefdft',
dirs: get_option('extra_lib_dirs'),
has_headers: ['sleefdft.h'],
header_args: extra_include_args,
required: true)
sleef_dep = cpp.find_library('sleef',
dirs: get_option('extra_lib_dirs'),
has_headers: ['sleef.h'],
header_args: extra_include_args,
required: true)
config_summary += { 'FFT': 'SLEEF' }
endif
feature_dependencies += sleefdft_dep
feature_dependencies += sleef_dep
feature_defines += ['-DHAVE_SLEEF']
elif fft == 'vdsp'
config_summary += { 'FFT': 'vDSP' }
message('For FFT: using vDSP')
@@ -223,27 +258,41 @@ if resampler == 'builtin'
feature_defines += ['-DUSE_BQRESAMPLER']
elif resampler == 'libsamplerate'
if samplerate_dep.found()
config_summary += { 'Resampler': 'libsamplerate' }
message('For resampler: using libsamplerate')
pkgconfig_requirements += samplerate_dep
else
if not samplerate_dep.found()
samplerate_dep = cpp.find_library('samplerate',
dirs: get_option('extra_lib_dirs'),
has_headers: ['samplerate.h'],
header_args: extra_include_args,
required: true)
endif
config_summary += { 'Resampler': 'libsamplerate' }
message('For resampler: using libsamplerate')
feature_dependencies += samplerate_dep
pkgconfig_requirements += samplerate_dep
feature_defines += ['-DHAVE_LIBSAMPLERATE']
elif resampler == 'speex'
config_summary += { 'Resampler': 'Speex' }
message('For resampler: using Speex')
message('For resampler: using bundled Speex')
message('(consider libsamplerate if time-varying pitch shift is required)')
feature_sources += ['src/ext/speex/resample.c']
feature_defines += ['-DUSE_SPEEX']
elif resampler == 'libspeexdsp'
if not speexdsp_dep.found()
speexdsp_dep = cpp.find_library('speexdsp',
dirs: get_option('extra_lib_dirs'),
has_headers: ['speex/speex_resampler.h'],
header_args: extra_include_args,
required: true)
endif
config_summary += { 'Resampler': 'Speex DSP' }
message('For resampler: using Speex DSP library')
message('(consider libsamplerate if time-varying pitch shift is required)')
feature_dependencies += speexdsp_dep
pkgconfig_requirements += speexdsp_dep
feature_defines += ['-DHAVE_LIBSPEEXDSP']
elif resampler == 'ipp'
if ipp_path != ''
config_summary += { 'Resampler': 'Intel IPP' }

View File

@@ -1,13 +1,13 @@
option('fft',
type: 'combo',
choices: ['auto', 'builtin', 'kissfft', 'fftw', 'vdsp', 'ipp'],
choices: ['auto', 'builtin', 'kissfft', 'fftw', 'sleef', 'vdsp', 'ipp'],
value: 'auto',
description: 'FFT library to use. The default (auto) will use vDSP if available, the builtin implementation otherwise.')
option('resampler',
type: 'combo',
choices: ['auto', 'builtin', 'libsamplerate', 'speex', 'ipp'],
choices: ['auto', 'builtin', 'libsamplerate', 'speex', 'libspeexdsp', 'ipp'],
value: 'auto',
description: 'Resampler library to use. The default (auto) simply uses the builtin implementation.')

View File

@@ -28,6 +28,7 @@ RUBBERBAND_SRC_FILES := \
$(RUBBERBAND_SRC_PATH)/common/Allocators.cpp \
$(RUBBERBAND_SRC_PATH)/common/StretchCalculator.cpp \
$(RUBBERBAND_SRC_PATH)/common/sysutils.cpp \
$(RUBBERBAND_SRC_PATH)/common/mathmisc.cpp \
$(RUBBERBAND_SRC_PATH)/common/Thread.cpp \
$(RUBBERBAND_SRC_PATH)/finer/R3StretcherImpl.cpp

View File

@@ -55,6 +55,7 @@ LIBRARY_SOURCES := \
src/common/Resampler.cpp \
src/common/StretchCalculator.cpp \
src/common/sysutils.cpp \
src/common/mathmisc.cpp \
src/common/Thread.cpp \
src/finer/R3Stretcher.cpp

View File

@@ -44,6 +44,7 @@ LIBRARY_SOURCES := \
src/common/Resampler.cpp \
src/common/StretchCalculator.cpp \
src/common/sysutils.cpp \
src/common/mathmisc.cpp \
src/common/Thread.cpp \
src/finer/R3Stretcher.cpp

View File

@@ -44,6 +44,7 @@ LIBRARY_SOURCES := \
src/common/Resampler.cpp \
src/common/StretchCalculator.cpp \
src/common/sysutils.cpp \
src/common/mathmisc.cpp \
src/common/Thread.cpp \
src/finer/R3Stretcher.cpp

View File

@@ -44,6 +44,7 @@ LIBRARY_SOURCES := \
src/common/Resampler.cpp \
src/common/StretchCalculator.cpp \
src/common/sysutils.cpp \
src/common/mathmisc.cpp \
src/common/Thread.cpp \
src/finer/R3Stretcher.cpp

View File

@@ -157,6 +157,7 @@
<ClCompile Include="..\src\common\Allocators.cpp" />
<ClCompile Include="..\src\common\StretchCalculator.cpp" />
<ClCompile Include="..\src\common\sysutils.cpp" />
<ClCompile Include="..\src\common\mathmisc.cpp" />
<ClCompile Include="..\src\common\Thread.cpp" />
<ClCompile Include="..\src\finer\R3Stretcher.cpp" />
</ItemGroup>

View File

@@ -24,7 +24,7 @@
#ifndef RUBBERBAND_STRETCHER_H
#define RUBBERBAND_STRETCHER_H
#define RUBBERBAND_VERSION "3.0.0"
#define RUBBERBAND_VERSION "3.1.0"
#define RUBBERBAND_API_MAJOR_VERSION 2
#define RUBBERBAND_API_MINOR_VERSION 7
@@ -262,26 +262,32 @@ public:
* situation where \c OptionThreadingAuto would do so, except omit
* the check for multiple CPUs and instead assume it to be true.
*
* 7. Flags prefixed \c OptionWindow control the window size for
* FFT processing in the R2 engine. (The window size actually
* used will depend on many factors, but it can be influenced.)
* These options currently have no effect when using the R3
* engine, but they may do in the future - so code written to use
* R3 now is recommended to use the default. These options may
* 7. Flags prefixed \c OptionWindow influence the window size for
* FFT processing. In the R2 engine these affect the resulting
* sound quality but have relatively little effect on processing
* speed. With the R3 engine they can dramatically affect
* processing speed as well as output quality. These options may
* not be changed after construction.
*
* \li \c OptionWindowStandard - Use the default window size.
* The actual size will vary depending on other parameters.
* This option is expected to produce better results than the
* other window options in most situations.
* other window options in most situations. In the R3 engine
* this causes the engine's full multi-resolution processing
* scheme to be used.
*
* \li \c OptionWindowShort - Use a shorter window. This may
* result in crisper sound for audio that depends strongly on
* its timing qualities.
* \li \c OptionWindowShort - Use a shorter window. With the R2
* engine this may result in crisper sound for audio that
* depends strongly on its timing qualities. With the R3 engine,
* this causes the engine to be restricted to a single window
* size, resulting in both dramatically faster processing and
* lower latency than OptionWindowStandard, but at the expense
* of some sound quality.
*
* \li \c OptionWindowLong - Use a longer window. This is
* likely to result in a smoother sound at the expense of
* clarity and timing.
* \li \c OptionWindowLong - Use a longer window. With the R2
* engine this is likely to result in a smoother sound at the
* expense of clarity and timing. The R3 engine currently
* ignores this option, treating it like OptionWindowStandard.
*
* 8. Flags prefixed \c OptionSmoothing control the use of
* window-presum FFT and time-domain smoothing in the R2
@@ -331,10 +337,10 @@ public:
* \li \c OptionPitchHighConsistency - Use a method that
* supports dynamic pitch changes without discontinuities,
* including when crossing the 1.0 pitch scale. This may cost
* more in CPU than the other two options, especially when the
* pitch scale is exactly 1.0. You should use this option
* whenever you wish to support dynamically changing pitch
* shifts during processing.
* more in CPU than the default, especially when the pitch scale
* is exactly 1.0. You should use this option whenever you wish
* to support dynamically changing pitch shift during
* processing.
*
* 11. Flags prefixed \c OptionChannels control the method used
* for processing two-channel stereo audio. These have different,

View File

@@ -28,7 +28,7 @@
extern "C" {
#endif
#define RUBBERBAND_VERSION "3.0.0"
#define RUBBERBAND_VERSION "3.1.0"
#define RUBBERBAND_API_MAJOR_VERSION 2
#define RUBBERBAND_API_MINOR_VERSION 7

View File

@@ -44,6 +44,8 @@
standalone library.
*/
#ifndef ALREADY_CONFIGURED
#define USE_BQRESAMPLER 1
#define NO_TIMING 1
@@ -56,6 +58,8 @@
#define USE_BUILTIN_FFT 1
#endif
#endif
#include "../src/faster/AudioCurveCalculator.cpp"
#include "../src/faster/CompoundAudioCurve.cpp"
#include "../src/faster/HighFrequencyAudioCurve.cpp"
@@ -69,6 +73,7 @@
#include "../src/common/Allocators.cpp"
#include "../src/common/StretchCalculator.cpp"
#include "../src/common/sysutils.cpp"
#include "../src/common/mathmisc.cpp"
#include "../src/common/Thread.cpp"
#include "../src/faster/StretcherChannelData.cpp"
#include "../src/faster/R2Stretcher.cpp"

View File

@@ -85,9 +85,9 @@ T *allocate(size_t count)
#else /* !MALLOC_IS_ALIGNED */
// That's the "sufficiently aligned" functions dealt with, the
// rest need a specific alignment provided to the call. 32-byte
// alignment is required for at least OpenMAX
static const int alignment = 32;
// rest need a specific alignment provided to the call. 64-byte
// alignment is enough for 8x8 double operations
static const int alignment = 64;
#ifdef HAVE__ALIGNED_MALLOC
ptr = _aligned_malloc(count * sizeof(T), alignment);

View File

@@ -31,6 +31,8 @@
#include "Allocators.h"
#include "VectorOps.h"
#include "mathmisc.h"
#define BQ_R__ R__
using std::vector;
@@ -122,6 +124,7 @@ BQResampler::QualityParams::QualityParams(Quality q)
k_snr = 70.0;
k_transition = 0.2;
cut = 0.9;
rational_max = 48000;
break;
case FastestTolerable:
p_multiple = 62;
@@ -129,6 +132,7 @@ BQResampler::QualityParams::QualityParams(Quality q)
k_snr = 90.0;
k_transition = 0.05;
cut = 0.975;
rational_max = 96000;
break;
case Best:
p_multiple = 122;
@@ -136,6 +140,7 @@ BQResampler::QualityParams::QualityParams(Quality q)
k_snr = 100.0;
k_transition = 0.01;
cut = 0.995;
rational_max = 192000;
break;
}
}
@@ -373,36 +378,18 @@ BQResampler::fill_params(double ratio, double numd, double denomd) const
BQResampler::params
BQResampler::pick_params(double ratio) const
{
// Farey algorithm, see
// https://www.johndcook.com/blog/2010/10/20/best-rational-approximation/
int max_denom = 192000;
double a = 0.0, b = 1.0, c = 1.0, d = 0.0;
double pa = a, pb = b, pc = c, pd = d;
double eps = 1e-9;
while (b <= max_denom && d <= max_denom) {
double mediant = (a + c) / (b + d);
if (fabs(ratio - mediant) < eps) {
if (b + d <= max_denom) {
return fill_params(ratio, a + c, b + d);
} else if (d > b) {
return fill_params(ratio, c, d);
int max_denom;
if (m_dynamism == RatioMostlyFixed) {
max_denom = 192000;
} else {
return fill_params(ratio, a, b);
max_denom = m_qparams.rational_max;
if (ratio > 1.0) {
max_denom = int(ceil(max_denom / ratio));
}
}
if (ratio > mediant) {
pa = a; pb = b;
a += c; b += d;
} else {
pc = c; pd = d;
c += a; d += b;
}
}
if (fabs(ratio - (pc / pd)) < fabs(ratio - (pa / pb))) {
return fill_params(ratio, pc, pd);
} else {
return fill_params(ratio, pa, pb);
}
int num, denom;
pickNearestRational(ratio, max_denom, num, denom);
return fill_params(ratio, num, denom);
}
void

View File

@@ -71,6 +71,7 @@ private:
double k_snr;
double k_transition;
double cut;
int rational_max;
QualityParams(Quality);
};

View File

@@ -53,6 +53,13 @@
#include <fftw3.h>
#endif
#ifdef HAVE_SLEEF
extern "C" {
#include <sleef.h>
#include <sleefdft.h>
}
#endif
#ifdef HAVE_VDSP
#include <Accelerate/Accelerate.h>
#endif
@@ -63,6 +70,7 @@
#ifndef HAVE_IPP
#ifndef HAVE_FFTW3
#ifndef HAVE_SLEEF
#ifndef HAVE_KISSFFT
#ifndef USE_BUILTIN_FFT
#ifndef HAVE_VDSP
@@ -72,6 +80,7 @@
#endif
#endif
#endif
#endif
#include <cmath>
#include <iostream>
@@ -1425,6 +1434,302 @@ pthread_mutex_t D_FFTW::m_commonMutex = PTHREAD_MUTEX_INITIALIZER;
#endif /* HAVE_FFTW3 */
#ifdef HAVE_SLEEF
class D_SLEEF : public FFTImpl
{
bool isAligned(const void *ptr) {
return ! ((uintptr_t)ptr & 63);
}
public:
D_SLEEF(int size) :
m_fplanf(0), m_fplani(0), m_fbuf(0), m_fpacked(0),
m_dplanf(0), m_dplani(0), m_dbuf(0), m_dpacked(0),
m_size(size)
{
}
~D_SLEEF() {
if (m_fplanf) {
SleefDFT_dispose(m_fplanf);
SleefDFT_dispose(m_fplani);
Sleef_free(m_fbuf);
Sleef_free(m_fpacked);
}
if (m_dplanf) {
SleefDFT_dispose(m_dplanf);
SleefDFT_dispose(m_dplani);
Sleef_free(m_dbuf);
Sleef_free(m_dpacked);
}
}
int getSize() const {
return m_size;
}
FFT::Precisions
getSupportedPrecisions() const {
return FFT::SinglePrecision | FFT::DoublePrecision;
}
void initFloat() {
if (m_fplanf) return;
m_fbuf = static_cast<float *>
(Sleef_malloc(m_size * sizeof(float)));
m_fpacked = static_cast<float *>
(Sleef_malloc((m_size + 2) * sizeof(float)));
m_fplanf = SleefDFT_float_init1d
(m_size, m_fbuf, m_fpacked,
SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
m_fplani = SleefDFT_float_init1d
(m_size, m_fpacked, m_fbuf,
SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
}
void initDouble() {
if (m_dplanf) return;
m_dbuf = static_cast<double *>
(Sleef_malloc(m_size * sizeof(double)));
m_dpacked = static_cast<double *>
(Sleef_malloc((m_size + 2) * sizeof(double)));
m_dplanf = SleefDFT_double_init1d
(m_size, m_dbuf, m_dpacked,
SLEEF_MODE_FORWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
m_dplani = SleefDFT_double_init1d
(m_size, m_dpacked, m_dbuf,
SLEEF_MODE_BACKWARD | SLEEF_MODE_REAL | SLEEF_MODE_ESTIMATE);
}
void packFloat(const float *BQ_R__ re, const float *BQ_R__ im) {
const float *src[2] = { re, im };
v_interleave(m_fpacked, src, 2, m_size/2 + 1);
}
void packDouble(const double *BQ_R__ re, const double *BQ_R__ im) {
const double *src[2] = { re, im };
v_interleave(m_dpacked, src, 2, m_size/2 + 1);
}
void unpackFloat(float *BQ_R__ re, float *BQ_R__ im) {
float *dst[2] = { re, im };
v_deinterleave(dst, m_fpacked, 2, m_size/2 + 1);
}
void unpackDouble(double *BQ_R__ re, double *BQ_R__ im) {
double *dst[2] = { re, im };
v_deinterleave(dst, m_dpacked, 2, m_size/2 + 1);
}
void forward(const double *BQ_R__ realIn, double *BQ_R__ realOut, double *BQ_R__ imagOut) {
if (!m_dplanf) initDouble();
if (isAligned(realIn)) {
SleefDFT_double_execute(m_dplanf, realIn, 0);
} else {
v_copy(m_dbuf, realIn, m_size);
SleefDFT_double_execute(m_dplanf, 0, 0);
}
unpackDouble(realOut, imagOut);
}
void forwardInterleaved(const double *BQ_R__ realIn, double *BQ_R__ complexOut) {
if (!m_dplanf) initDouble();
if (isAligned(realIn) && isAligned(complexOut)) {
SleefDFT_double_execute(m_dplanf, realIn, complexOut);
} else {
v_copy(m_dbuf, realIn, m_size);
SleefDFT_double_execute(m_dplanf, 0, 0);
v_copy(complexOut, m_dpacked, m_size + 2);
}
}
void forwardPolar(const double *BQ_R__ realIn, double *BQ_R__ magOut, double *BQ_R__ phaseOut) {
if (!m_dplanf) initDouble();
if (isAligned(realIn)) {
SleefDFT_double_execute(m_dplanf, realIn, 0);
} else {
v_copy(m_dbuf, realIn, m_size);
SleefDFT_double_execute(m_dplanf, 0, 0);
}
v_cartesian_interleaved_to_polar(magOut, phaseOut, m_dpacked, m_size/2+1);
}
void forwardMagnitude(const double *BQ_R__ realIn, double *BQ_R__ magOut) {
if (!m_dplanf) initDouble();
if (isAligned(realIn)) {
SleefDFT_double_execute(m_dplanf, realIn, 0);
} else {
v_copy(m_dbuf, realIn, m_size);
SleefDFT_double_execute(m_dplanf, 0, 0);
}
v_cartesian_interleaved_to_magnitudes(magOut, m_dpacked, m_size/2+1);
}
void forward(const float *BQ_R__ realIn, float *BQ_R__ realOut, float *BQ_R__ imagOut) {
if (!m_fplanf) initFloat();
if (isAligned(realIn)) {
SleefDFT_float_execute(m_fplanf, realIn, 0);
} else {
v_copy(m_fbuf, realIn, m_size);
SleefDFT_float_execute(m_fplanf, 0, 0);
}
unpackFloat(realOut, imagOut);
}
void forwardInterleaved(const float *BQ_R__ realIn, float *BQ_R__ complexOut) {
if (!m_fplanf) initFloat();
if (isAligned(realIn) && isAligned(complexOut)) {
SleefDFT_float_execute(m_fplanf, realIn, complexOut);
} else {
v_copy(m_fbuf, realIn, m_size);
SleefDFT_float_execute(m_fplanf, 0, 0);
v_copy(complexOut, m_fpacked, m_size + 2);
}
}
void forwardPolar(const float *BQ_R__ realIn, float *BQ_R__ magOut, float *BQ_R__ phaseOut) {
if (!m_fplanf) initFloat();
if (isAligned(realIn)) {
SleefDFT_float_execute(m_fplanf, realIn, 0);
} else {
v_copy(m_fbuf, realIn, m_size);
SleefDFT_float_execute(m_fplanf, 0, 0);
}
v_cartesian_interleaved_to_polar(magOut, phaseOut, m_fpacked, m_size/2+1);
}
void forwardMagnitude(const float *BQ_R__ realIn, float *BQ_R__ magOut) {
if (!m_fplanf) initFloat();
if (isAligned(realIn)) {
SleefDFT_float_execute(m_fplanf, realIn, 0);
} else {
v_copy(m_fbuf, realIn, m_size);
SleefDFT_float_execute(m_fplanf, 0, 0);
}
v_cartesian_interleaved_to_magnitudes(magOut, m_fpacked, m_size/2+1);
}
void inverse(const double *BQ_R__ realIn, const double *BQ_R__ imagIn, double *BQ_R__ realOut) {
if (!m_dplanf) initDouble();
packDouble(realIn, imagIn);
if (isAligned(realOut)) {
SleefDFT_double_execute(m_dplani, 0, realOut);
} else {
SleefDFT_double_execute(m_dplani, 0, 0);
v_copy(realOut, m_dbuf, m_size);
}
}
void inverseInterleaved(const double *BQ_R__ complexIn, double *BQ_R__ realOut) {
if (!m_dplanf) initDouble();
if (isAligned(complexIn) && isAligned(realOut)) {
SleefDFT_double_execute(m_dplani, complexIn, realOut);
} else {
v_copy(m_dpacked, complexIn, m_size + 2);
SleefDFT_double_execute(m_dplani, 0, 0);
v_copy(realOut, m_dbuf, m_size);
}
}
void inversePolar(const double *BQ_R__ magIn, const double *BQ_R__ phaseIn, double *BQ_R__ realOut) {
if (!m_dplanf) initDouble();
v_polar_to_cartesian_interleaved(m_dpacked, magIn, phaseIn, m_size/2+1);
if (isAligned(realOut)) {
SleefDFT_double_execute(m_dplani, 0, realOut);
} else {
SleefDFT_double_execute(m_dplani, 0, 0);
v_copy(realOut, m_dbuf, m_size);
}
}
void inverseCepstral(const double *BQ_R__ magIn, double *BQ_R__ cepOut) {
if (!m_dplanf) initDouble();
const int hs = m_size/2;
for (int i = 0; i <= hs; ++i) {
m_dpacked[i*2] = log(magIn[i] + 0.000001);
m_dpacked[i*2+1] = 0.0;
}
if (isAligned(cepOut)) {
SleefDFT_double_execute(m_dplani, 0, cepOut);
} else {
SleefDFT_double_execute(m_dplani, 0, 0);
v_copy(cepOut, m_dbuf, m_size);
}
}
void inverse(const float *BQ_R__ realIn, const float *BQ_R__ imagIn, float *BQ_R__ realOut) {
if (!m_fplanf) initFloat();
packFloat(realIn, imagIn);
if (isAligned(realOut)) {
SleefDFT_float_execute(m_dplani, 0, realOut);
} else {
SleefDFT_float_execute(m_fplani, 0, 0);
v_copy(realOut, m_fbuf, m_size);
}
}
void inverseInterleaved(const float *BQ_R__ complexIn, float *BQ_R__ realOut) {
if (!m_fplanf) initFloat();
if (isAligned(complexIn) && isAligned(realOut)) {
SleefDFT_float_execute(m_fplani, complexIn, realOut);
} else {
v_copy(m_fpacked, complexIn, m_size + 2);
SleefDFT_float_execute(m_fplani, 0, 0);
v_copy(realOut, m_fbuf, m_size);
}
}
void inversePolar(const float *BQ_R__ magIn, const float *BQ_R__ phaseIn, float *BQ_R__ realOut) {
if (!m_fplanf) initFloat();
v_polar_to_cartesian_interleaved(m_fpacked, magIn, phaseIn, m_size/2+1);
if (isAligned(realOut)) {
SleefDFT_float_execute(m_fplani, 0, realOut);
} else {
SleefDFT_float_execute(m_fplani, 0, 0);
v_copy(realOut, m_fbuf, m_size);
}
}
void inverseCepstral(const float *BQ_R__ magIn, float *BQ_R__ cepOut) {
if (!m_fplanf) initFloat();
const int hs = m_size/2;
for (int i = 0; i <= hs; ++i) {
m_fpacked[i*2] = logf(magIn[i] + 0.000001f);
m_fpacked[i*2+1] = 0.0;
}
if (isAligned(cepOut)) {
SleefDFT_float_execute(m_fplani, 0, cepOut);
} else {
SleefDFT_float_execute(m_fplani, 0, 0);
v_copy(cepOut, m_fbuf, m_size);
}
}
private:
SleefDFT *m_fplanf;
SleefDFT *m_fplani;
float *m_fbuf;
float *m_fpacked;
SleefDFT *m_dplanf;
SleefDFT *m_dplani;
double *m_dbuf;
double *m_dpacked;
const int m_size;
};
#endif /* HAVE_SLEEF */
#ifdef HAVE_KISSFFT
class D_KISSFFT : public FFTImpl
@@ -2266,6 +2571,9 @@ getImplementationDetails()
#ifdef HAVE_FFTW3
impls["fftw"] = SizeConstraintNone;
#endif
#ifdef HAVE_SLEEF
impls["sleef"] = SizeConstraintEvenPowerOfTwo;
#endif
#ifdef HAVE_KISSFFT
impls["kissfft"] = SizeConstraintEven;
#endif
@@ -2310,7 +2618,7 @@ pickImplementation(int size)
}
std::string preference[] = {
"ipp", "vdsp", "fftw", "builtin", "kissfft"
"ipp", "vdsp", "sleef", "fftw", "builtin", "kissfft"
};
for (int i = 0; i < int(sizeof(preference)/sizeof(preference[0])); ++i) {
@@ -2390,6 +2698,10 @@ FFT::FFT(int size, int debugLevel) :
} else if (impl == "fftw") {
#ifdef HAVE_FFTW3
d = new FFTs::D_FFTW(size);
#endif
} else if (impl == "sleef") {
#ifdef HAVE_SLEEF
d = new FFTs::D_SLEEF(size);
#endif
} else if (impl == "kissfft") {
#ifdef HAVE_KISSFFT
@@ -2650,6 +2962,14 @@ FFT::tune()
candidates["fftw"] = d;
#endif
#ifdef HAVE_SLEEF
os << "Constructing new SLEEF FFT object for size " << size << "..." << std::endl;
d = new FFTs::D_SLEEF(size);
d->initFloat();
d->initDouble();
candidates["sleef"] = d;
#endif
#ifdef HAVE_KISSFFT
os << "Constructing new KISSFFT object for size " << size << "..." << std::endl;
d = new FFTs::D_KISSFFT(size);

View File

@@ -50,23 +50,23 @@ Profiler::m_worstCalls;
static Mutex profileMutex;
void
Profiler::add(const char *id, float ms)
Profiler::add(const char *id, double us)
{
profileMutex.lock();
ProfileMap::iterator pmi = m_profiles.find(id);
if (pmi != m_profiles.end()) {
++pmi->second.first;
pmi->second.second += ms;
pmi->second.second += us;
} else {
m_profiles[id] = TimePair(1, ms);
m_profiles[id] = TimePair(1, us);
}
WorstCallMap::iterator wci = m_worstCalls.find(id);
if (wci != m_worstCalls.end()) {
if (ms > wci->second) wci->second = ms;
if (us > wci->second) wci->second = us;
} else {
m_worstCalls[id] = ms;
m_worstCalls[id] = us;
}
profileMutex.unlock();
@@ -95,11 +95,13 @@ Profiler::getReport()
#endif
report += buffer;
typedef std::multimap<float, const char *> TimeRMap;
typedef std::multimap<double, const char *> TimeRMap;
typedef std::multimap<int, const char *> IntRMap;
TimeRMap totmap, avgmap, worstmap;
IntRMap ncallmap;
const unsigned char mu_s[] = { 0xce, 0xbc, 's', 0x0 };
for (ProfileMap::const_iterator i = m_profiles.begin();
i != m_profiles.end(); ++i) {
totmap.insert(TimeRMap::value_type(i->second.second, i->first));
@@ -113,38 +115,6 @@ Profiler::getReport()
worstmap.insert(TimeRMap::value_type(i->second, i->first));
}
snprintf(buffer, buflen, "\nBy total:\n");
report += buffer;
for (TimeRMap::const_iterator i = totmap.end(); i != totmap.begin(); ) {
--i;
snprintf(buffer, buflen, "%-40s %f ms\n", i->second, i->first);
report += buffer;
}
snprintf(buffer, buflen, "\nBy average:\n");
report += buffer;
for (TimeRMap::const_iterator i = avgmap.end(); i != avgmap.begin(); ) {
--i;
snprintf(buffer, buflen, "%-40s %f ms\n", i->second, i->first);
report += buffer;
}
snprintf(buffer, buflen, "\nBy worst case:\n");
report += buffer;
for (TimeRMap::const_iterator i = worstmap.end(); i != worstmap.begin(); ) {
--i;
snprintf(buffer, buflen, "%-40s %f ms\n", i->second, i->first);
report += buffer;
}
snprintf(buffer, buflen, "\nBy number of calls:\n");
report += buffer;
for (IntRMap::const_iterator i = ncallmap.end(); i != ncallmap.begin(); ) {
--i;
snprintf(buffer, buflen, "%-40s %d\n", i->second, i->first);
report += buffer;
}
snprintf(buffer, buflen, "\nBy name:\n");
report += buffer;
@@ -165,15 +135,47 @@ Profiler::getReport()
const TimePair &pp(j->second);
snprintf(buffer, buflen, "%s(%d):\n", *i, pp.first);
report += buffer;
snprintf(buffer, buflen, "\tReal: \t%f ms \t[%f ms total]\n",
(pp.second / pp.first),
(pp.second));
snprintf(buffer, buflen, "\tReal: \t%12f %s \t[%f %s total]\n",
(pp.second / pp.first), mu_s,
(pp.second), mu_s);
report += buffer;
WorstCallMap::const_iterator k = m_worstCalls.find(*i);
if (k == m_worstCalls.end()) continue;
snprintf(buffer, buflen, "\tWorst:\t%f ms/call\n", k->second);
snprintf(buffer, buflen, "\tWorst:\t%14f %s/call\n", k->second, mu_s);
report += buffer;
}
snprintf(buffer, buflen, "\nBy total:\n");
report += buffer;
for (TimeRMap::const_iterator i = totmap.end(); i != totmap.begin(); ) {
--i;
snprintf(buffer, buflen, "%-40s %14f %s\n", i->second, i->first, mu_s);
report += buffer;
}
snprintf(buffer, buflen, "\nBy average:\n");
report += buffer;
for (TimeRMap::const_iterator i = avgmap.end(); i != avgmap.begin(); ) {
--i;
snprintf(buffer, buflen, "%-40s %14f %s\n", i->second, i->first, mu_s);
report += buffer;
}
snprintf(buffer, buflen, "\nBy worst case:\n");
report += buffer;
for (TimeRMap::const_iterator i = worstmap.end(); i != worstmap.begin(); ) {
--i;
snprintf(buffer, buflen, "%-40s %14f %s\n", i->second, i->first, mu_s);
report += buffer;
}
snprintf(buffer, buflen, "\nBy number of calls:\n");
report += buffer;
for (IntRMap::const_iterator i = ncallmap.end(); i != ncallmap.begin(); ) {
--i;
snprintf(buffer, buflen, "%-40s %14d\n", i->second, i->first);
report += buffer;
}
@@ -186,11 +188,7 @@ Profiler::Profiler(const char* c) :
m_c(c),
m_ended(false)
{
#ifdef PROFILE_CLOCKS
m_start = clock();
#else
(void)gettimeofday(&m_start, 0);
#endif
m_start = std::chrono::steady_clock::now();
}
Profiler::~Profiler()
@@ -201,25 +199,9 @@ Profiler::~Profiler()
void
Profiler::end()
{
#ifdef PROFILE_CLOCKS
clock_t end = clock();
clock_t elapsed = end - m_start;
float ms = float((double(elapsed) / double(CLOCKS_PER_SEC)) * 1000.0);
#else
struct timeval tv;
(void)gettimeofday(&tv, 0);
tv.tv_sec -= m_start.tv_sec;
if (tv.tv_usec < m_start.tv_usec) {
tv.tv_usec += 1000000;
tv.tv_sec -= 1;
}
tv.tv_usec -= m_start.tv_usec;
float ms = float((double(tv.tv_sec) + (double(tv.tv_usec) / 1000000.0)) * 1000.0);
#endif
add(m_c, ms);
auto finish = std::chrono::steady_clock::now();
std::chrono::duration<double, std::micro> us = finish - m_start;
add(m_c, us.count());
m_ended = true;
}

View File

@@ -39,14 +39,7 @@
#endif
#ifndef NO_TIMING
#ifdef PROFILE_CLOCKS
#include <time.h>
#else
#include "sysutils.h"
#ifndef _WIN32
#include <sys/time.h>
#endif
#endif
#include <chrono>
#endif
#ifndef NO_TIMING
@@ -75,21 +68,17 @@ public:
static std::string getReport();
protected:
const char* m_c;
#ifdef PROFILE_CLOCKS
clock_t m_start;
#else
struct timeval m_start;
#endif
const char *const m_c;
std::chrono::time_point<std::chrono::steady_clock> m_start;
bool m_showOnDestruct;
bool m_ended;
typedef std::pair<int, float> TimePair;
typedef std::pair<int, double> TimePair;
typedef std::map<const char *, TimePair> ProfileMap;
typedef std::map<const char *, float> WorstCallMap;
typedef std::map<const char *, double> WorstCallMap;
static ProfileMap m_profiles;
static WorstCallMap m_worstCalls;
static void add(const char *, float);
static void add(const char *, double);
};
#else

View File

@@ -26,6 +26,8 @@
#include "Allocators.h"
#include "VectorOps.h"
#include "mathmisc.h"
#include <cstdlib>
#include <cmath>
@@ -55,6 +57,10 @@
#ifdef USE_SPEEX
#include "../ext/speex/speex_resampler.h"
#else
#ifdef HAVE_LIBSPEEXDSP
#include <speex/speex_resampler.h>
#endif
#endif
#ifdef USE_BQRESAMPLER
@@ -64,6 +70,7 @@
#ifndef HAVE_IPP
#ifndef HAVE_LIBSAMPLERATE
#ifndef HAVE_LIBRESAMPLE
#ifndef HAVE_LIBSPEEXDSP
#ifndef USE_SPEEX
#ifndef USE_BQRESAMPLER
#error No resampler implementation selected!
@@ -72,6 +79,7 @@
#endif
#endif
#endif
#endif
#define BQ_R__ R__
@@ -1106,7 +1114,7 @@ D_BQResampler::reset()
#endif /* USE_BQRESAMPLER */
#ifdef USE_SPEEX
#if defined(USE_SPEEX) || defined(HAVE_LIBSPEEXDSP)
class D_Speex : public Resampler::Impl
{
@@ -1214,19 +1222,23 @@ D_Speex::setRatio(double ratio)
// Speex wants a ratio of two unsigned integers, not a single
// float. Let's do that.
unsigned int big = 272408136U;
unsigned int denom = 1, num = 1;
if (ratio < 1.f) {
denom = big;
double dnum = double(big) * double(ratio);
num = (unsigned int)dnum;
} else if (ratio > 1.f) {
num = big;
double ddenom = double(big) / double(ratio);
denom = (unsigned int)ddenom;
int max_denom = 48000;
if (ratio > 1.0) {
max_denom = int(ceil(48000 / ratio));
}
int inum, idenom;
pickNearestRational(ratio, max_denom, inum, idenom);
if (inum < 0 || idenom < 0) {
cerr << "Resampler::setRatio: Internal error: "
<< "numerator or denominator < 0 ("
<< inum << "/" << idenom << ")" << endl;
return;
}
unsigned int num = inum, denom = idenom;
if (m_debugLevel > 1) {
cerr << "D_Speex: Desired ratio " << ratio << ", requesting ratio "
<< num << "/" << denom << " = " << float(double(num)/double(denom))
@@ -1241,7 +1253,11 @@ D_Speex::setRatio(double ratio)
if (err) {
cerr << "Resampler::Resampler: failed to set rate on Speex resampler"
<< endl;
<< " (with ratio = " << ratio << " [ratio-1 = " << ratio - 1.0
<< "], denom = " << denom
<< ", num = " << num << ", fromRate = " << fromRate
<< ", toRate = " << toRate << ", err = " << err
<< ")" << endl;
#ifndef NO_EXCEPTIONS
throw Resampler::ImplementationError;
#endif
@@ -1404,6 +1420,9 @@ Resampler::Resampler(Resampler::Parameters params, int channels)
#ifdef USE_SPEEX
m_method = 2;
#endif
#ifdef HAVE_LIBSPEEXDSP
m_method = 2;
#endif
#ifdef HAVE_LIBRESAMPLE
m_method = 3;
#endif
@@ -1425,6 +1444,9 @@ Resampler::Resampler(Resampler::Parameters params, int channels)
#ifdef USE_SPEEX
m_method = 2;
#endif
#ifdef HAVE_LIBSPEEXDSP
m_method = 2;
#endif
#ifdef USE_BQRESAMPLER
m_method = 4;
#endif
@@ -1443,6 +1465,9 @@ Resampler::Resampler(Resampler::Parameters params, int channels)
#ifdef USE_SPEEX
m_method = 2;
#endif
#ifdef HAVE_LIBSPEEXDSP
m_method = 2;
#endif
#ifdef USE_BQRESAMPLER
m_method = 4;
#endif
@@ -1483,7 +1508,7 @@ Resampler::Resampler(Resampler::Parameters params, int channels)
break;
case 2:
#ifdef USE_SPEEX
#if defined(USE_SPEEX) || defined(HAVE_LIBSPEEXDSP)
d = new Resamplers::D_Speex
(params.quality, params.ratioChange,
channels,

69
src/common/mathmisc.cpp Normal file
View File

@@ -0,0 +1,69 @@
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
Rubber Band Library
An audio time-stretching and pitch-shifting library.
Copyright 2007-2022 Particular Programs Ltd.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
Alternatively, if you have a valid commercial licence for the
Rubber Band Library obtained by agreement with the copyright
holders, you may redistribute and/or modify it under the terms
described in that licence.
If you wish to distribute code using the Rubber Band Library
under terms other than those of the GNU General Public License,
you must obtain a valid commercial licence before doing so.
*/
#include "mathmisc.h"
namespace RubberBand {
void pickNearestRational(double ratio, int max_denom, int &num, int &denom)
{
// Farey algorithm, see
// https://www.johndcook.com/blog/2010/10/20/best-rational-approximation/
double a = 0.0, b = 1.0, c = 1.0, d = 0.0;
double pa = a, pb = b, pc = c, pd = d;
double eps = 1e-9;
while (b <= max_denom && d <= max_denom) {
double mediant = (a + c) / (b + d);
if (fabs(ratio - mediant) < eps) {
if (b + d <= max_denom) {
num = a + c;
denom = b + d;
return;
} else if (d > b) {
num = c;
denom = d;
return;
} else {
num = a;
denom = b;
return;
}
}
if (ratio > mediant) {
pa = a; pb = b;
a += c; b += d;
} else {
pc = c; pd = d;
c += a; d += b;
}
}
if (fabs(ratio - (pc / pd)) < fabs(ratio - (pa / pb))) {
num = pc;
denom = pd;
} else {
num = pa;
denom = pb;
}
}
}

View File

@@ -24,6 +24,8 @@
#ifndef RUBBERBAND_MATHMISC_H
#define RUBBERBAND_MATHMISC_H
#include "sysutils.h"
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif // M_PI
@@ -51,6 +53,8 @@ inline double frequencyForBin(int b, int fftSize, double sampleRate) {
return (double(b) * sampleRate) / double(fftSize);
}
void pickNearestRational(double ratio, int maxDenom, int &num, int &denom);
}
#endif

View File

@@ -101,6 +101,8 @@ R2Stretcher::R2Stretcher(size_t sampleRate,
m_freq2(12000),
m_baseFftSize(m_defaultFftSize)
{
Profiler profiler("R2Stretcher::R2Stretcher");
if (!_initialised) {
system_specific_initialise();
_initialised = true;
@@ -316,8 +318,7 @@ R2Stretcher::setMaxProcessSize(size_t samples)
}
void
R2Stretcher::setKeyFrameMap(const std::map<size_t, size_t> &
mapping)
R2Stretcher::setKeyFrameMap(const std::map<size_t, size_t> &mapping)
{
if (m_realtime) {
m_log.log(0, "R2Stretcher::setKeyFrameMap: Cannot specify key frame map in RT mode");

View File

@@ -87,6 +87,8 @@ R2Stretcher::ChannelData::construct(const std::set<size_t> &sizes,
interpolator = allocate_and_zero<float>(maxSize);
interpolatorScale = 0;
unityResetLow = 16000.f;
for (std::set<size_t>::const_iterator i = sizes.begin();
i != sizes.end(); ++i) {
ffts[*i] = new FFT(*i);

View File

@@ -113,6 +113,7 @@ public:
float *ms; // only used when mid-side processing
float *interpolator; // only used when time-domain smoothing is on
int interpolatorScale;
float unityResetLow; // for gradual phase-reset on unity ratio
float *fltbuf;
process_t *dblbuf; // owned by FFT object, only used for time domain FFT i/o

View File

@@ -744,12 +744,29 @@ R2Stretcher::modifyChunk(size_t channel,
int bandlow = lrint((150 * m_fftSize) / rate);
int bandhigh = lrint((1000 * m_fftSize) / rate);
float r = getEffectiveRatio();
bool unity = (fabsf(r - 1.f) < 1.e-6f);
if (unity) {
if (!phaseReset) {
phaseReset = true;
bandlimited = true;
bandlow = lrint((cd.unityResetLow * m_fftSize) / rate);
bandhigh = count;
if (bandlow > 0) {
m_log.log(2, "unity: bandlow & high", bandlow, bandhigh);
}
}
cd.unityResetLow *= 0.9f;
} else {
cd.unityResetLow = 16000.f;
}
float freq0 = m_freq0;
float freq1 = m_freq1;
float freq2 = m_freq2;
if (laminar) {
float r = getEffectiveRatio();
if (r > 1) {
float rf0 = 600 + (600 * ((r-1)*(r-1)*(r-1)*2));
float f1ratio = freq1 / freq0;
@@ -1085,7 +1102,9 @@ R2Stretcher::writeChunk(size_t channel, size_t shiftIncrement, bool last)
}
void
R2Stretcher::writeOutput(RingBuffer<float> &to, float *from, size_t qty, size_t &outCount, size_t theoreticalOut)
R2Stretcher::writeOutput(RingBuffer<float> &to,
float *from, size_t qty,
size_t &outCount, size_t theoreticalOut)
{
Profiler profiler("R2Stretcher::writeOutput");

View File

@@ -27,6 +27,7 @@
#include "../common/Allocators.h"
#include "../common/MovingMedian.h"
#include "../common/RingBuffer.h"
#include "../common/Profiler.h"
#include <vector>
#include <memory>
@@ -97,6 +98,8 @@ public:
void classify(const process_t *const mag, // input, of at least binCount bins
Classification *classification) // output, of binCount bins
{
Profiler profiler("BinClassifier::classify");
const int n = m_parameters.binCount;
for (int i = 0; i < n; ++i) {

View File

@@ -28,6 +28,7 @@
#include "../common/HistogramFilter.h"
#include "../common/mathmisc.h"
#include "../common/Profiler.h"
#include <vector>
@@ -65,6 +66,9 @@ public:
}
Segmentation segment(const BinClassifier::Classification *classification) {
Profiler profiler("BinSegmenter::segment");
int n = m_parameters.binCount;
for (int i = 0; i < n; ++i) {
switch (classification[i]) {

View File

@@ -25,6 +25,7 @@
#define RUBBERBAND_GUIDE_H
#include "../common/Log.h"
#include "../common/Profiler.h"
#include <functional>
#include <sstream>
@@ -68,7 +69,9 @@ public:
struct Guidance {
FftBand fftBands[3];
int fftBandCount;
PhaseLockBand phaseLockBands[4];
int phaseLockBandCount;
Range kick;
Range preKick;
Range highUnlocked;
@@ -95,50 +98,92 @@ public:
int shortestFftSize;
int classificationFftSize;
BandLimits fftBandLimits[3];
Configuration(int _longestFftSize, int _shortestFftSize,
int _classificationFftSize) :
longestFftSize(_longestFftSize),
shortestFftSize(_shortestFftSize),
classificationFftSize(_classificationFftSize) { }
int fftBandLimitCount;
Configuration() :
longestFftSize(0), shortestFftSize(0), classificationFftSize(0),
fftBandLimitCount(0) { }
};
struct Parameters {
double sampleRate;
Parameters(double _sampleRate) : sampleRate(_sampleRate) { }
bool singleWindowMode;
Parameters(double _sampleRate, bool _singleWindow) :
sampleRate(_sampleRate),
singleWindowMode(_singleWindow) { }
};
Guide(Parameters parameters, Log log) :
m_parameters(parameters),
m_log(log),
m_configuration(roundUp(int(ceil(parameters.sampleRate / 16.0))),
roundUp(int(ceil(parameters.sampleRate / 64.0))),
roundUp(int(ceil(parameters.sampleRate / 32.0)))),
m_minLower(500.0), m_minHigher(4000.0),
m_defaultLower(700.0), m_defaultHigher(4800.0),
m_maxLower(1100.0), m_maxHigher(7000.0)
m_log(log)
{
double rate = m_parameters.sampleRate;
double nyquist = rate / 2.0;
m_log.log(1, "Guide: rate", rate);
m_log.log(1, "Guide: rate and single-window mode",
rate, m_parameters.singleWindowMode);
int bandFftSize = roundUp(int(ceil(rate/16.0)));
m_configuration.fftBandLimits[0] =
BandLimits(bandFftSize, rate, 0.0, m_maxLower);
int classificationFftSize =
roundUp(int(ceil(parameters.sampleRate / 32.0)));
// This is the classification and fallback FFT: we need it to
// go up to Nyquist so we can seamlessly switch to it for
// longer stretches, and down to 0.0 so we can use it for
// unity in offline mode
bandFftSize = roundUp(int(ceil(rate/32.0)));
m_configuration.fftBandLimits[1] =
BandLimits(bandFftSize, rate, 0.0, rate / 2.0);
bandFftSize = roundUp(int(ceil(rate/64.0)));
m_configuration.fftBandLimits[2] =
BandLimits(bandFftSize, rate, m_minHigher, rate/2.0);
m_configuration.classificationFftSize = classificationFftSize;
m_log.log(1, "Guide: classification FFT size",
m_configuration.classificationFftSize);
if (m_parameters.singleWindowMode) {
// Single-window mode
m_configuration.longestFftSize = classificationFftSize;
m_configuration.shortestFftSize = classificationFftSize;
m_defaultLower = nyquist;
m_minLower = m_defaultLower;
m_maxLower = m_defaultLower;
m_defaultHigher = nyquist;
m_minHigher = m_defaultHigher;
m_maxHigher = m_defaultHigher;
m_configuration.fftBandLimitCount = 1;
m_configuration.fftBandLimits[0] =
BandLimits(classificationFftSize, rate, 0.0, nyquist);
} else {
// The normal multi-window mode
m_configuration.longestFftSize = classificationFftSize * 2;
m_configuration.shortestFftSize = classificationFftSize / 2;
m_defaultLower = 700.0;
m_minLower = 500.0;
m_maxLower = 1100.0;
m_defaultHigher = 4800.0;
m_minHigher = 4000.0;
m_maxHigher = 7000.0;
m_configuration.fftBandLimitCount = 3;
m_configuration.fftBandLimits[0] =
BandLimits(m_configuration.longestFftSize,
rate, 0.0, m_maxLower);
// This is the classification and fallback FFT: we need it
// to go up to Nyquist so we can seamlessly switch to it
// for longer stretches, and down to 0.0 so we can use it
// for unity in offline mode
m_configuration.fftBandLimits[1] =
BandLimits(classificationFftSize,
rate, 0.0, nyquist);
m_configuration.fftBandLimits[2] =
BandLimits(m_configuration.shortestFftSize,
rate, m_minHigher, nyquist);
}
}
const Configuration &getConfiguration() const {
@@ -159,6 +204,8 @@ public:
bool tighterChannelLock,
Guidance &guidance) const {
Profiler profiler("Guide::updateGuidance");
bool hadPhaseReset = guidance.phaseReset.present;
guidance.phaseReset.present = false;
@@ -168,12 +215,57 @@ public:
guidance.channelLock.present = false;
double nyquist = m_parameters.sampleRate / 2.0;
guidance.fftBands[0].fftSize = roundUp(int(ceil(nyquist/8.0)));
guidance.fftBands[1].fftSize = roundUp(int(ceil(nyquist/16.0)));
guidance.fftBands[2].fftSize = roundUp(int(ceil(nyquist/32.0)));
if (m_parameters.singleWindowMode) {
// All the fft and phase-lock bands are fixed in this
// mode. We'll still need to continue to set up phase
// reset ranges etc, including the unity case.
guidance.fftBandCount = 1;
guidance.fftBands[0].fftSize = m_configuration.classificationFftSize;
guidance.fftBands[0].f0 = 0.0;
guidance.fftBands[0].f1 = nyquist;
guidance.phaseLockBandCount = 3;
guidance.phaseLockBands[0].p = 1;
guidance.phaseLockBands[0].beta = betaFor(1200.0, ratio);
guidance.phaseLockBands[0].f0 = 0.0;
guidance.phaseLockBands[0].f1 = 1600.0;
guidance.phaseLockBands[1].p = 2;
guidance.phaseLockBands[1].beta = betaFor(4800.0, ratio);
guidance.phaseLockBands[1].f0 = 1600.0;
guidance.phaseLockBands[1].f1 = 7000.0;
guidance.phaseLockBands[2].p = 5;
guidance.phaseLockBands[2].beta = betaFor(10000.0, ratio);
guidance.phaseLockBands[2].f0 = 7000.0;
guidance.phaseLockBands[2].f1 = nyquist;
if (outhop > 256) {
guidance.phaseLockBands[2].p = 4;
}
} else {
// The normal multi-window mode
guidance.fftBandCount = 3;
guidance.fftBands[0].fftSize = m_configuration.longestFftSize;
guidance.fftBands[1].fftSize = m_configuration.classificationFftSize;
guidance.fftBands[2].fftSize = m_configuration.shortestFftSize;
guidance.phaseLockBandCount = 4;
// This is a vital stop case for PhaseAdvance
guidance.phaseLockBands[3].f1 = nyquist;
}
// We've set the counts, and for single-window mode we've set
// the band ranges as well - in normal multi-window mode we
// still have to do that, but we should do these first
if (meanMagnitude < 1.0e-6) {
updateForSilence(guidance);
@@ -183,8 +275,6 @@ public:
if (unityCount > 0) {
updateForUnity(guidance,
hadPhaseReset,
unityCount,
magnitudes,
segmentation,
realtime);
return;
@@ -199,6 +289,8 @@ public:
guidance.channelLock.f1 = 600.0;
}
if (!m_parameters.singleWindowMode) {
bool kick =
(segmentation.percussiveBelow > 40.0) &&
(prevSegmentation.percussiveBelow < 40.0) &&
@@ -208,6 +300,18 @@ public:
(nextSegmentation.percussiveBelow > 40.0) &&
(segmentation.percussiveBelow < 40.0) &&
checkPotentialKick(nextMagnitudes, magnitudes);
if (kick) {
guidance.kick.present = true;
guidance.kick.f0 = 0.0;
guidance.kick.f1 = segmentation.percussiveBelow;
} else if (futureKick) {
guidance.preKick.present = true;
guidance.preKick.f0 = 0.0;
guidance.preKick.f1 = nextSegmentation.percussiveBelow;
}
}
/*
std::cout << "d:"
<< prevSegmentation.percussiveBelow << ","
@@ -218,15 +322,6 @@ public:
<< (kick ? "K" : "N") << ","
<< (futureKick ? "F" : "N") << std::endl;
*/
if (kick) {
guidance.kick.present = true;
guidance.kick.f0 = 0.0;
guidance.kick.f1 = segmentation.percussiveBelow;
} else if (futureKick) {
guidance.preKick.present = true;
guidance.preKick.f0 = 0.0;
guidance.preKick.f1 = nextSegmentation.percussiveBelow;
}
if (segmentation.residualAbove > segmentation.percussiveAbove) {
guidance.highUnlocked.present = true;
@@ -249,6 +344,11 @@ public:
}
}
if (!m_parameters.singleWindowMode) {
// The normal multi-window mode. For single-window we did
// this already.
double prevLower = guidance.fftBands[0].f1;
double lower = descendToValley(prevLower, magnitudes);
if (lower > m_maxLower || lower < m_minLower) {
@@ -264,8 +364,6 @@ public:
guidance.fftBands[0].f0 = 0.0;
guidance.fftBands[0].f1 = lower;
// std::cout << "x:" << lower << std::endl;
guidance.fftBands[1].f0 = lower;
guidance.fftBands[1].f1 = higher;
@@ -290,7 +388,7 @@ public:
guidance.phaseLockBands[1].f1 = mid;
guidance.phaseLockBands[2].p = 3;
guidance.phaseLockBands[2].beta = betaFor(5000.0, ratio);
guidance.phaseLockBands[2].beta = betaFor(4800.0, ratio);
guidance.phaseLockBands[2].f0 = mid;
guidance.phaseLockBands[2].f1 = higher;
@@ -302,6 +400,7 @@ public:
if (outhop > 256) {
guidance.phaseLockBands[3].p = 3;
}
}
if (ratio > 2.0) {
@@ -326,7 +425,7 @@ public:
guidance.highUnlocked.present = true;
}
/*
/*
std::ostringstream str;
str << "Guidance: FFT bands: ["
<< guidance.fftBands[0].fftSize << " from "
@@ -341,8 +440,9 @@ public:
<< guidance.phaseReset.present << " from "
<< guidance.phaseReset.f0 << " to " << guidance.phaseReset.f1
<< "]" << std::endl;
m_parameters.logger(str.str());
*/
m_log.log(1, str.str().c_str());
*/
}
void setDebugLevel(int level) {
@@ -374,12 +474,14 @@ protected:
void updateForSilence(Guidance &guidance) const {
// std::cout << "phase reset on silence" << std::endl;
double nyquist = m_parameters.sampleRate / 2.0;
if (!m_parameters.singleWindowMode) {
guidance.fftBands[0].f0 = 0.0;
guidance.fftBands[0].f1 = 0.0;
guidance.fftBands[1].f0 = 0.0;
guidance.fftBands[1].f1 = nyquist;
guidance.fftBands[2].f0 = nyquist;
guidance.fftBands[2].f1 = nyquist;
}
guidance.phaseReset.present = true;
guidance.phaseReset.f0 = 0.0;
guidance.phaseReset.f1 = nyquist;
@@ -387,8 +489,6 @@ protected:
void updateForUnity(Guidance &guidance,
bool hadPhaseReset,
uint32_t /* unityCount */,
const process_t *const /* magnitudes */,
const BinSegmenter::Segmentation &segmentation,
bool realtime) const {
@@ -399,24 +499,28 @@ protected:
if (!realtime) {
// ratio can't change, so we are just running 1.0 ratio
// throughout
if (!m_parameters.singleWindowMode) {
guidance.fftBands[0].f0 = 0.0;
guidance.fftBands[0].f1 = 0.0;
guidance.fftBands[1].f0 = 0.0;
guidance.fftBands[1].f1 = nyquist;
guidance.fftBands[2].f0 = nyquist;
guidance.fftBands[2].f1 = nyquist;
}
guidance.phaseReset.present = true;
guidance.phaseReset.f0 = 0.0;
guidance.phaseReset.f1 = nyquist;
return;
}
if (!m_parameters.singleWindowMode) {
guidance.fftBands[0].f0 = 0.0;
guidance.fftBands[0].f1 = m_minLower;
guidance.fftBands[1].f0 = m_minLower;
guidance.fftBands[1].f1 = m_minHigher;
guidance.fftBands[2].f0 = m_minHigher;
guidance.fftBands[2].f1 = nyquist;
}
guidance.phaseReset.present = true;

View File

@@ -28,6 +28,7 @@
#include "../common/Log.h"
#include "../common/mathmisc.h"
#include "../common/Profiler.h"
#include <sstream>
#include <functional>
@@ -42,8 +43,11 @@ public:
int fftSize;
double sampleRate;
int channels;
Parameters(int _fftSize, double _sampleRate, int _channels) :
fftSize(_fftSize), sampleRate(_sampleRate), channels(_channels) { }
bool singleWindowMode;
Parameters(int _fftSize, double _sampleRate, int _channels,
bool _singleWindow) :
fftSize(_fftSize), sampleRate(_sampleRate), channels(_channels),
singleWindowMode(_singleWindow) { }
};
GuidedPhaseAdvance(Parameters parameters, Log log) :
@@ -93,6 +97,8 @@ public:
int inhop,
int outhop) {
Profiler profiler("GuidedPhaseAdvance::advance");
int myFftBand = 0;
int i = 0;
for (const auto &fband : guidance[0]->fftBands) {

View File

@@ -24,6 +24,7 @@
#include "R3Stretcher.h"
#include "../common/VectorOpsComplex.h"
#include "../common/Profiler.h"
#include <array>
@@ -34,13 +35,18 @@ R3Stretcher::R3Stretcher(Parameters parameters,
double initialPitchScale,
Log log) :
m_parameters(parameters),
m_limits(parameters.options),
m_log(log),
m_timeRatio(initialTimeRatio),
m_pitchScale(initialPitchScale),
m_formantScale(0.0),
m_guide(Guide::Parameters(m_parameters.sampleRate), m_log),
m_guide(Guide::Parameters
(m_parameters.sampleRate,
m_parameters.options & RubberBandStretcher::OptionWindowShort),
m_log),
m_guideConfiguration(m_guide.getConfiguration()),
m_channelAssembly(m_parameters.channels),
m_useReadahead(true),
m_inhop(1),
m_prevInhop(1),
m_prevOuthop(1),
@@ -54,11 +60,23 @@ R3Stretcher::R3Stretcher(Parameters parameters,
m_totalOutputDuration(0),
m_mode(ProcessMode::JustCreated)
{
Profiler profiler("R3Stretcher::R3Stretcher");
m_log.log(1, "R3Stretcher::R3Stretcher: rate, options",
m_parameters.sampleRate, m_parameters.options);
m_log.log(1, "R3Stretcher::R3Stretcher: initial time ratio and pitch scale",
m_timeRatio, m_pitchScale);
if (isRealTime()) {
m_log.log(1, "R3Stretcher::R3Stretcher: real-time mode");
} else {
m_log.log(1, "R3Stretcher::R3Stretcher: offline mode");
}
if (isSingleWindowed()) {
m_log.log(1, "R3Stretcher::R3Stretcher: intermediate shorter-window mode requested");
}
double maxClassifierFrequency = 16000.0;
if (maxClassifierFrequency > m_parameters.sampleRate/2) {
maxClassifierFrequency = m_parameters.sampleRate/2;
@@ -74,17 +92,23 @@ R3Stretcher::R3Stretcher(Parameters parameters,
BinClassifier::Parameters classifierParameters
(classificationBins, 9, 1, 10, 2.0, 2.0);
int inRingBufferSize = m_guideConfiguration.longestFftSize * 2;
int outRingBufferSize = m_guideConfiguration.longestFftSize * 16;
if (isSingleWindowed()) {
classifierParameters.horizontalFilterLength = 7;
}
int inRingBufferSize = getWindowSourceSize() * 2;
int outRingBufferSize = getWindowSourceSize() * 16;
for (int c = 0; c < m_parameters.channels; ++c) {
m_channelData.push_back(std::make_shared<ChannelData>
(segmenterParameters,
classifierParameters,
m_guideConfiguration.longestFftSize,
getWindowSourceSize(),
inRingBufferSize,
outRingBufferSize));
for (auto band: m_guideConfiguration.fftBandLimits) {
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
const auto &band = m_guideConfiguration.fftBandLimits[b];
int fftSize = band.fftSize;
m_channelData[c]->scales[fftSize] =
std::make_shared<ChannelScaleData>
@@ -92,10 +116,12 @@ R3Stretcher::R3Stretcher(Parameters parameters,
}
}
for (auto band: m_guideConfiguration.fftBandLimits) {
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
const auto &band = m_guideConfiguration.fftBandLimits[b];
int fftSize = band.fftSize;
GuidedPhaseAdvance::Parameters guidedParameters
(fftSize, m_parameters.sampleRate, m_parameters.channels);
(fftSize, m_parameters.sampleRate, m_parameters.channels,
isSingleWindowed());
m_scaleData[fftSize] = std::make_shared<ScaleData>
(guidedParameters, m_log);
}
@@ -127,30 +153,42 @@ R3Stretcher::R3Stretcher(Parameters parameters,
}
WindowType
R3Stretcher::ScaleData::analysisWindowShape(int fftSize)
R3Stretcher::ScaleData::analysisWindowShape()
{
if (singleWindowMode) {
return HannWindow;
} else {
if (fftSize > 2048) return HannWindow;
else return NiemitaloForwardWindow;
}
}
int
R3Stretcher::ScaleData::analysisWindowLength(int fftSize)
R3Stretcher::ScaleData::analysisWindowLength()
{
return fftSize;
}
WindowType
R3Stretcher::ScaleData::synthesisWindowShape(int fftSize)
R3Stretcher::ScaleData::synthesisWindowShape()
{
if (singleWindowMode) {
return HannWindow;
} else {
if (fftSize > 2048) return HannWindow;
else return NiemitaloReverseWindow;
}
}
int
R3Stretcher::ScaleData::synthesisWindowLength(int fftSize)
R3Stretcher::ScaleData::synthesisWindowLength()
{
if (singleWindowMode) {
return fftSize;
} else {
if (fftSize > 2048) return fftSize/2;
else return fftSize;
}
}
void
@@ -233,6 +271,8 @@ R3Stretcher::setKeyFrameMap(const std::map<size_t, size_t> &mapping)
void
R3Stretcher::createResampler()
{
Profiler profiler("R3Stretcher::createResampler");
Resampler::Parameters resamplerParameters;
if (m_parameters.options & RubberBandStretcher::OptionPitchHighQuality) {
@@ -245,14 +285,12 @@ R3Stretcher::createResampler()
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize;
if (isRealTime()) {
if (m_parameters.options &
RubberBandStretcher::OptionPitchHighConsistency) {
// If we knew the caller would never change ratio, we could
// supply RatioMostlyFixed - but it can have such overhead
// when the ratio *does* change (and it's not RT-safe overhead
// either) that a single call would kill RT use
resamplerParameters.dynamism = Resampler::RatioOftenChanging;
resamplerParameters.ratioChange = Resampler::SmoothRatioChange;
} else {
resamplerParameters.dynamism = Resampler::RatioMostlyFixed;
resamplerParameters.ratioChange = Resampler::SmoothRatioChange;
}
} else {
resamplerParameters.dynamism = Resampler::RatioMostlyFixed;
resamplerParameters.ratioChange = Resampler::SuddenRatioChange;
@@ -260,6 +298,20 @@ R3Stretcher::createResampler()
m_resampler = std::unique_ptr<Resampler>
(new Resampler(resamplerParameters, m_parameters.channels));
bool before, after;
areWeResampling(&before, &after);
if (before) {
if (after) {
m_log.log(0, "WARNING: createResampler: we think we are resampling both before and after!");
} else {
m_log.log(1, "createResampler: resampling before");
}
} else {
if (after) {
m_log.log(1, "createResampler: resampling after");
}
}
}
void
@@ -284,24 +336,45 @@ R3Stretcher::calculateHop()
} else if (ratio < 1.0) {
proposedOuthop = pow(2.0, 8.0 + 2.0 * log10(ratio));
}
if (proposedOuthop > 512.0) proposedOuthop = 512.0;
if (proposedOuthop < 128.0) proposedOuthop = 128.0;
if (isSingleWindowed()) {
// the single (shorter) window mode actually uses a longer
// synthesis window for the 2048-bin FFT and drops the
// 1024-bin one, so it can survive longer hops, which is good
// because reduced CPU consumption is the whole motivation
proposedOuthop *= 2.0;
}
if (proposedOuthop > m_limits.maxPreferredOuthop) {
proposedOuthop = m_limits.maxPreferredOuthop;
}
if (proposedOuthop < m_limits.minPreferredOuthop) {
proposedOuthop = m_limits.minPreferredOuthop;
}
m_log.log(1, "calculateHop: ratio and proposed outhop", ratio, proposedOuthop);
double inhop = proposedOuthop / ratio;
if (inhop < 1.0) {
m_log.log(0, "WARNING: Extreme ratio yields ideal inhop < 1, results may be suspect", ratio, inhop);
inhop = 1.0;
if (inhop < m_limits.minInhop) {
m_log.log(0, "WARNING: Ratio yields ideal inhop < minimum, results may be suspect", inhop, m_limits.minInhop);
inhop = m_limits.minInhop;
}
if (inhop > 1024.0) {
m_log.log(0, "WARNING: Extreme ratio yields ideal inhop > 1024, results may be suspect", ratio, inhop);
inhop = 1024.0;
if (inhop > m_limits.maxInhop) {
// Log level 1, this is not as big a deal as < minInhop above
m_log.log(1, "WARNING: Ratio yields ideal inhop > maximum, results may be suspect", inhop, m_limits.maxInhop);
inhop = m_limits.maxInhop;
}
m_inhop = int(floor(inhop));
m_log.log(1, "calculateHop: inhop and mean outhop", m_inhop, m_inhop * ratio);
if (m_inhop < m_limits.maxInhopWithReadahead) {
m_log.log(1, "calculateHop: using readahead");
m_useReadahead = true;
} else {
m_log.log(1, "calculateHop: not using readahead, inhop too long for buffer in current configuration");
m_useReadahead = false;
}
}
void
@@ -409,7 +482,7 @@ R3Stretcher::getPreferredStartPad() const
if (!isRealTime()) {
return 0;
} else {
return m_guideConfiguration.longestFftSize / 2;
return getWindowSourceSize() / 2;
}
}
@@ -420,7 +493,7 @@ R3Stretcher::getStartDelay() const
return 0;
} else {
double factor = 0.5 / m_pitchScale;
return size_t(ceil(m_guideConfiguration.longestFftSize * factor));
return size_t(ceil(getWindowSourceSize() * factor));
}
}
@@ -463,6 +536,8 @@ R3Stretcher::reset()
void
R3Stretcher::study(const float *const *, size_t samples, bool)
{
Profiler profiler("R3Stretcher::study");
if (isRealTime()) {
m_log.log(0, "R3Stretcher::study: Not meaningful in realtime mode");
return;
@@ -491,10 +566,9 @@ size_t
R3Stretcher::getSamplesRequired() const
{
if (available() != 0) return 0;
int longest = m_guideConfiguration.longestFftSize;
int rs = m_channelData[0]->inbuf->getReadSpace();
if (rs < longest) {
return longest - rs;
if (rs < getWindowSourceSize()) {
return getWindowSourceSize() - rs;
} else {
return 0;
}
@@ -504,7 +578,7 @@ void
R3Stretcher::setMaxProcessSize(size_t n)
{
size_t oldSize = m_channelData[0]->inbuf->getSize();
size_t newSize = m_guideConfiguration.longestFftSize + n;
size_t newSize = getWindowSourceSize() + n;
if (newSize > oldSize) {
m_log.log(1, "setMaxProcessSize: resizing from and to", oldSize, newSize);
@@ -520,6 +594,8 @@ R3Stretcher::setMaxProcessSize(size_t n)
void
R3Stretcher::process(const float *const *input, size_t samples, bool final)
{
Profiler profiler("R3Stretcher::process");
if (m_mode == ProcessMode::Finished) {
m_log.log(0, "R3Stretcher::process: Cannot process again after final chunk");
return;
@@ -557,11 +633,11 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final)
createResampler();
}
// Pad to half the longest frame. As with R2, in real-time
// mode we don't do this -- it's better to start with a
// swoosh than introduce more latency, and we don't want
// gaps when the ratio changes.
int pad = m_guideConfiguration.longestFftSize / 2;
// Pad to half the frame. As with R2, in real-time mode we
// don't do this -- it's better to start with a swoosh
// than introduce more latency, and we don't want gaps
// when the ratio changes.
int pad = getWindowSourceSize() / 2;
m_log.log(1, "offline mode: prefilling with", pad);
for (int c = 0; c < m_parameters.channels; ++c) {
m_channelData[c]->inbuf->zero(pad);
@@ -584,21 +660,73 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final)
m_mode = ProcessMode::Processing;
}
size_t ws = m_channelData[0]->inbuf->getWriteSpace();
if (samples > ws) {
m_log.log(0, "R3Stretcher::process: WARNING: Forced to increase input buffer size. Either setMaxProcessSize was not properly called or process is being called repeatedly without retrieve. Write space and samples", ws, samples);
size_t newSize = m_channelData[0]->inbuf->getSize() - ws + samples;
for (int c = 0; c < m_parameters.channels; ++c) {
auto newBuf = m_channelData[c]->inbuf->resized(newSize);
m_channelData[c]->inbuf = std::unique_ptr<RingBuffer<float>>(newBuf);
}
bool resamplingBefore = false;
areWeResampling(&resamplingBefore, nullptr);
int channels = m_parameters.channels;
int inputIx = 0;
while (inputIx < int(samples)) {
int remaining = int(samples) - inputIx;
int ws = m_channelData[0]->inbuf->getWriteSpace();
if (ws == 0) {
consume();
ws = m_channelData[0]->inbuf->getWriteSpace();
}
if (ws == 0) {
m_log.log(0, "R3Stretcher::process: WARNING: Forced to increase input buffer size. Either setMaxProcessSize was not properly called, process is being called repeatedly without retrieve, or an internal error has led to an incorrect resampler output calculation. Samples to write", remaining);
size_t newSize = m_channelData[0]->inbuf->getSize() + remaining;
for (int c = 0; c < m_parameters.channels; ++c) {
m_channelData[c]->inbuf->write(input[c], samples);
auto newBuf = m_channelData[c]->inbuf->resized(newSize);
m_channelData[c]->inbuf =
std::unique_ptr<RingBuffer<float>>(newBuf);
}
continue;
}
if (resamplingBefore) {
for (int c = 0; c < channels; ++c) {
auto &cd = m_channelData.at(c);
m_channelAssembly.resampled[c] = cd->resampled.data();
}
int resampleBufSize = int(m_channelData.at(0)->resampled.size());
int maxResampleOutput = std::min(ws, resampleBufSize);
int maxResampleInput = int(floor(maxResampleOutput * m_pitchScale));
int resampleInput = std::min(remaining, maxResampleInput);
if (resampleInput == 0) resampleInput = 1;
int resampleOutput = m_resampler->resample
(m_channelAssembly.resampled.data(),
maxResampleOutput,
input,
resampleInput,
1.0 / m_pitchScale,
final);
inputIx += resampleInput;
for (int c = 0; c < m_parameters.channels; ++c) {
m_channelData[c]->inbuf->write
(m_channelData.at(c)->resampled.data(),
resampleOutput);
}
} else {
int toWrite = std::min(ws, remaining);
for (int c = 0; c < m_parameters.channels; ++c) {
m_channelData[c]->inbuf->write (input[c] + inputIx, toWrite);
}
inputIx += toWrite;
}
consume();
}
}
int
@@ -615,6 +743,8 @@ R3Stretcher::available() const
size_t
R3Stretcher::retrieve(float *const *output, size_t samples) const
{
Profiler profiler("R3Stretcher::retrieve");
int got = samples;
for (int c = 0; c < m_parameters.channels; ++c) {
@@ -633,10 +763,15 @@ R3Stretcher::retrieve(float *const *output, size_t samples) const
void
R3Stretcher::consume()
{
Profiler profiler("R3Stretcher::consume");
int longest = m_guideConfiguration.longestFftSize;
int channels = m_parameters.channels;
int inhop = m_inhop;
bool resamplingAfter = false;
areWeResampling(nullptr, &resamplingAfter);
double effectivePitchRatio = 1.0 / m_pitchScale;
if (m_resampler) {
effectivePitchRatio =
@@ -680,6 +815,8 @@ R3Stretcher::consume()
while (cd0->outbuf->getWriteSpace() >= outhop) {
Profiler profiler("R3Stretcher::consume/loop");
// NB our ChannelData, ScaleData, and ChannelScaleData maps
// contain shared_ptrs; whenever we retain one of them in a
// variable, we do so by reference to avoid copying the
@@ -687,7 +824,7 @@ R3Stretcher::consume()
// the map iterators
int readSpace = cd0->inbuf->getReadSpace();
if (readSpace < longest) {
if (readSpace < getWindowSourceSize()) {
if (m_mode == ProcessMode::Finished) {
if (readSpace == 0) {
int fill = cd0->scales.at(longest)->accumulatorFill;
@@ -745,17 +882,8 @@ R3Stretcher::consume()
// Resample
bool resampling = false;
if (m_resampler) {
if (m_pitchScale != 1.0 ||
(m_parameters.options &
RubberBandStretcher::OptionPitchHighConsistency)) {
resampling = true;
}
}
int resampledCount = 0;
if (resampling) {
if (resamplingAfter) {
for (int c = 0; c < channels; ++c) {
auto &cd = m_channelData.at(c);
m_channelAssembly.mixdown[c] = cd->mixdown.data();
@@ -773,7 +901,7 @@ R3Stretcher::consume()
// Emit
int writeCount = outhop;
if (resampling) {
if (resamplingAfter) {
writeCount = resampledCount;
}
if (!isRealTime()) {
@@ -798,7 +926,7 @@ R3Stretcher::consume()
for (int c = 0; c < channels; ++c) {
auto &cd = m_channelData.at(c);
if (resampling) {
if (resamplingAfter) {
cd->outbuf->write(cd->resampled.data(), writeCount);
} else {
cd->outbuf->write(cd->mixdown.data(), writeCount);
@@ -827,62 +955,69 @@ R3Stretcher::consume()
void
R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
{
Profiler profiler("R3Stretcher::analyseChannel");
auto &cd = m_channelData.at(c);
int sourceSize = cd->windowSource.size();
process_t *buf = cd->windowSource.data();
int readSpace = cd->inbuf->getReadSpace();
if (readSpace < sourceSize) {
cd->inbuf->peek(buf, readSpace);
v_zero(buf + readSpace, sourceSize - readSpace);
} else {
cd->inbuf->peek(buf, sourceSize);
}
// We have an unwindowed time-domain frame in buf that is as long
// as required for the union of all FFT sizes and readahead
// hops. Populate the various sizes from it with aligned centres,
// windowing as we copy. The classification scale is handled
// separately because it has readahead, so skip it here. (In
// single-window mode that means we do nothing here, since the
// classification scale is the only one.)
int longest = m_guideConfiguration.longestFftSize;
int classify = m_guideConfiguration.classificationFftSize;
auto &cd = m_channelData.at(c);
process_t *buf = cd->scales.at(longest)->timeDomain.data();
int readSpace = cd->inbuf->getReadSpace();
if (readSpace < longest) {
cd->inbuf->peek(buf, readSpace);
v_zero(buf + readSpace, longest - readSpace);
} else {
cd->inbuf->peek(buf, longest);
}
// We have a single unwindowed frame at the longest FFT size
// ("scale"). Populate the shorter FFT sizes from the centre of
// it, windowing as we copy. The classification scale is handled
// separately because it has readahead, so skip it here as well as
// the longest. (In practice this means we are probably only
// populating one scale)
for (auto &it: cd->scales) {
int fftSize = it.first;
if (fftSize == classify || fftSize == longest) continue;
if (fftSize == classify) continue;
int offset = (longest - fftSize) / 2;
m_scaleData.at(fftSize)->analysisWindow.cut
(buf + offset, it.second->timeDomain.data());
}
// The classification scale has a one-hop readahead, so populate
// the readahead from further down the long unwindowed frame.
auto &classifyScale = cd->scales.at(classify);
ClassificationReadaheadData &readahead = cd->readahead;
bool copyFromReadahead = false;
if (m_useReadahead) {
// The classification scale has a one-hop readahead, so
// populate the readahead from further down the long
// unwindowed frame.
m_scaleData.at(classify)->analysisWindow.cut
(buf + (longest - classify) / 2 + inhop,
readahead.timeDomain.data());
// If inhop has changed since the previous frame, we'll have to
// populate the classification scale (but for analysis/resynthesis
// rather than classification) anew rather than reuse the previous
// readahead. Pity...
// If inhop has changed since the previous frame, we must
// populate the classification scale (but for
// analysis/resynthesis rather than classification) anew
// rather than reuse the previous frame's readahead.
bool haveValidReadahead = cd->haveReadahead;
if (inhop != prevInhop) haveValidReadahead = false;
copyFromReadahead = cd->haveReadahead;
if (inhop != prevInhop) copyFromReadahead = false;
}
if (!haveValidReadahead) {
if (!copyFromReadahead) {
m_scaleData.at(classify)->analysisWindow.cut
(buf + (longest - classify) / 2,
classifyScale->timeDomain.data());
}
// Finally window the longest scale
m_scaleData.at(longest)->analysisWindow.cut(buf);
// FFT shift, forward FFT, and carry out cartesian-polar
// conversion for each FFT size.
@@ -892,7 +1027,9 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
// where the inhop has changed as above, in which case we need to
// do both readahead and current)
if (haveValidReadahead) {
if (m_useReadahead) {
if (copyFromReadahead) {
v_copy(classifyScale->mag.data(),
readahead.mag.data(),
classifyScale->bufSize);
@@ -906,14 +1043,14 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
classifyScale->real.data(),
classifyScale->imag.data());
for (const auto &b : m_guideConfiguration.fftBandLimits) {
if (b.fftSize == classify) {
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
const auto &band = m_guideConfiguration.fftBandLimits[b];
if (band.fftSize == classify) {
ToPolarSpec spec;
spec.magFromBin = 0;
spec.magBinCount = classify/2 + 1;
spec.polarFromBin = b.b0min;
spec.polarBinCount = b.b1max - b.b0min + 1;
spec.polarFromBin = band.b0min;
spec.polarBinCount = band.b1max - band.b0min + 1;
convertToPolar(readahead.mag.data(),
readahead.phase.data(),
classifyScale->real.data(),
@@ -928,15 +1065,16 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
}
cd->haveReadahead = true;
}
// For the others (and the classify as well, if the inhop has
// changed or we haven't filled the readahead yet) we operate
// directly in the scale data and restrict the range for
// cartesian-polar conversion
// changed or we aren't using readahead or haven't filled the
// readahead yet) we operate directly in the scale data and
// restrict the range for cartesian-polar conversion
for (auto &it: cd->scales) {
int fftSize = it.first;
if (fftSize == classify && haveValidReadahead) {
if (fftSize == classify && copyFromReadahead) {
continue;
}
@@ -948,8 +1086,9 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
scale->real.data(),
scale->imag.data());
for (const auto &b : m_guideConfiguration.fftBandLimits) {
if (b.fftSize == fftSize) {
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
const auto &band = m_guideConfiguration.fftBandLimits[b];
if (band.fftSize == fftSize) {
ToPolarSpec spec;
@@ -957,17 +1096,17 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
// range, as all the magnitudes (though not
// necessarily all phases) are potentially relevant to
// classification and formant analysis. But this case
// here only happens if we don't haveValidReadahead -
// the normal case is above and just copies from the
// here only happens if we don't copyFromReadahead -
// the normal case is above and, er, copies from the
// previous readahead.
if (fftSize == classify) {
spec.magFromBin = 0;
spec.magBinCount = classify/2 + 1;
spec.polarFromBin = b.b0min;
spec.polarBinCount = b.b1max - b.b0min + 1;
spec.polarFromBin = band.b0min;
spec.polarBinCount = band.b1max - band.b0min + 1;
} else {
spec.magFromBin = b.b0min;
spec.magBinCount = b.b1max - b.b0min + 1;
spec.magFromBin = band.b0min;
spec.magBinCount = band.b1max - band.b0min + 1;
spec.polarFromBin = spec.magFromBin;
spec.polarBinCount = spec.magBinCount;
}
@@ -997,8 +1136,14 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
v_copy(cd->classification.data(), cd->nextClassification.data(),
cd->classification.size());
if (m_useReadahead) {
cd->classifier->classify(readahead.mag.data(),
cd->nextClassification.data());
} else {
cd->classifier->classify(classifyScale->mag.data(),
cd->nextClassification.data());
}
cd->prevSegmentation = cd->segmentation;
cd->segmentation = cd->nextSegmentation;
@@ -1038,6 +1183,9 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
bool tighterChannelLock =
m_parameters.options & RubberBandStretcher::OptionChannelsTogether;
double magMean = v_mean(classifyScale->mag.data() + 1, classify/2);
if (m_useReadahead) {
m_guide.updateGuidance(ratio,
prevOuthop,
classifyScale->mag.data(),
@@ -1046,11 +1194,27 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
cd->segmentation,
cd->prevSegmentation,
cd->nextSegmentation,
v_mean(classifyScale->mag.data() + 1, classify/2),
magMean,
m_unityCount,
isRealTime(),
tighterChannelLock,
cd->guidance);
} else {
m_guide.updateGuidance(ratio,
prevOuthop,
classifyScale->prevMag.data(),
classifyScale->prevMag.data(),
classifyScale->mag.data(),
cd->segmentation,
cd->prevSegmentation,
cd->nextSegmentation,
magMean,
m_unityCount,
isRealTime(),
tighterChannelLock,
cd->guidance);
}
/*
if (c == 0) {
if (cd->guidance.kick.present) {
@@ -1067,6 +1231,8 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
void
R3Stretcher::analyseFormant(int c)
{
Profiler profiler("R3Stretcher::analyseFormant");
auto &cd = m_channelData.at(c);
auto &f = *cd->formant;
@@ -1101,6 +1267,8 @@ R3Stretcher::analyseFormant(int c)
void
R3Stretcher::adjustFormant(int c)
{
Profiler profiler("R3Stretcher::adjustFormant");
auto &cd = m_channelData.at(c);
for (auto &it : cd->scales) {
@@ -1116,9 +1284,10 @@ R3Stretcher::adjustFormant(int c)
process_t maxRatio = 60.0;
process_t minRatio = 1.0 / maxRatio;
for (const auto &b : m_guideConfiguration.fftBandLimits) {
if (b.fftSize != fftSize) continue;
for (int i = b.b0min; i < b.b1max && i < highBin; ++i) {
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
const auto &band = m_guideConfiguration.fftBandLimits[b];
if (band.fftSize != fftSize) continue;
for (int i = band.b0min; i < band.b1max && i < highBin; ++i) {
process_t source = cd->formant->envelopeAt(i * sourceFactor);
process_t target = cd->formant->envelopeAt(i * targetFactor);
if (target > 0.0) {
@@ -1135,6 +1304,10 @@ R3Stretcher::adjustFormant(int c)
void
R3Stretcher::adjustPreKick(int c)
{
if (isSingleWindowed()) return;
Profiler profiler("R3Stretcher::adjustPreKick");
auto &cd = m_channelData.at(c);
auto fftSize = cd->guidance.fftBands[0].fftSize;
if (cd->guidance.preKick.present) {
@@ -1166,12 +1339,17 @@ R3Stretcher::adjustPreKick(int c)
void
R3Stretcher::synthesiseChannel(int c, int outhop, bool draining)
{
Profiler profiler("R3Stretcher::synthesiseChannel");
int longest = m_guideConfiguration.longestFftSize;
auto &cd = m_channelData.at(c);
for (const auto &band : cd->guidance.fftBands) {
for (int b = 0; b < cd->guidance.fftBandCount; ++b) {
const auto &band = cd->guidance.fftBands[b];
int fftSize = band.fftSize;
auto &scale = cd->scales.at(fftSize);
auto &scaleData = m_scaleData.at(fftSize);

View File

@@ -103,6 +103,28 @@ public:
}
protected:
struct Limits {
int minPreferredOuthop;
int maxPreferredOuthop;
int minInhop;
int maxInhopWithReadahead;
int maxInhop;
Limits(RubberBandStretcher::Options options) :
minPreferredOuthop(128),
maxPreferredOuthop(512),
minInhop(1),
maxInhopWithReadahead(1024),
maxInhop(1024) {
if (options & RubberBandStretcher::OptionWindowShort) {
// See note in calculateHop
minPreferredOuthop = 256;
maxPreferredOuthop = 640;
maxInhopWithReadahead = 512;
maxInhop = 1560;
}
}
};
struct ClassificationReadaheadData {
FixedVector<process_t> timeDomain;
FixedVector<process_t> mag;
@@ -185,6 +207,7 @@ protected:
struct ChannelData {
std::map<int, std::shared_ptr<ChannelScaleData>> scales;
FixedVector<process_t> windowSource;
ClassificationReadaheadData readahead;
bool haveReadahead;
std::unique_ptr<BinClassifier> classifier;
@@ -203,9 +226,11 @@ protected:
ChannelData(BinSegmenter::Parameters segmenterParameters,
BinClassifier::Parameters classifierParameters,
int longestFftSize,
int windowSourceSize,
int inRingBufferSize,
int outRingBufferSize) :
scales(),
windowSource(windowSourceSize, 0.0),
readahead(segmenterParameters.fftSize),
haveReadahead(false),
classifier(new BinClassifier(classifierParameters)),
@@ -215,7 +240,7 @@ protected:
BinClassifier::Classification::Residual),
segmenter(new BinSegmenter(segmenterParameters)),
segmentation(), prevSegmentation(), nextSegmentation(),
mixdown(longestFftSize, 0.f), // though it could be shorter
mixdown(longestFftSize, 0.f),
resampled(outRingBufferSize, 0.f),
inbuf(new RingBuffer<float>(inRingBufferSize)),
outbuf(new RingBuffer<float>(outRingBufferSize)),
@@ -253,19 +278,22 @@ protected:
struct ScaleData {
int fftSize;
bool singleWindowMode;
FFT fft;
Window<process_t> analysisWindow;
Window<process_t> synthesisWindow;
process_t windowScaleFactor;
GuidedPhaseAdvance guided;
ScaleData(GuidedPhaseAdvance::Parameters guidedParameters,
Log log) :
fftSize(guidedParameters.fftSize),
singleWindowMode(guidedParameters.singleWindowMode),
fft(fftSize),
analysisWindow(analysisWindowShape(fftSize),
analysisWindowLength(fftSize)),
synthesisWindow(synthesisWindowShape(fftSize),
synthesisWindowLength(fftSize)),
analysisWindow(analysisWindowShape(),
analysisWindowLength()),
synthesisWindow(synthesisWindowShape(),
synthesisWindowLength()),
windowScaleFactor(0.0),
guided(guidedParameters, log)
{
@@ -277,13 +305,14 @@ protected:
}
}
WindowType analysisWindowShape(int fftSize);
int analysisWindowLength(int fftSize);
WindowType synthesisWindowShape(int fftSize);
int synthesisWindowLength(int fftSize);
WindowType analysisWindowShape();
int analysisWindowLength();
WindowType synthesisWindowShape();
int synthesisWindowLength();
};
Parameters m_parameters;
const Limits m_limits;
Log m_log;
std::atomic<double> m_timeRatio;
@@ -297,6 +326,7 @@ protected:
ChannelAssembly m_channelAssembly;
std::unique_ptr<StretchCalculator> m_calculator;
std::unique_ptr<Resampler> m_resampler;
bool m_useReadahead;
std::atomic<int> m_inhop;
int m_prevInhop;
int m_prevOuthop;
@@ -367,6 +397,44 @@ protected:
return m_parameters.options &
RubberBandStretcher::OptionProcessRealTime;
}
void areWeResampling(bool *before, bool *after) const {
if (before) *before = false;
if (after) *after = false;
if (!m_resampler) return;
if (m_parameters.options &
RubberBandStretcher::OptionPitchHighConsistency) {
if (after) *after = true;
} else if (m_pitchScale != 1.0) {
if (m_pitchScale > 1.0 &&
(m_parameters.options &
RubberBandStretcher::OptionPitchHighQuality)) {
if (after) *after = true;
} else if (m_pitchScale < 1.0) {
if (after) *after = true;
} else {
if (before) *before = true;
}
}
}
bool isSingleWindowed() const {
return m_parameters.options &
RubberBandStretcher::OptionWindowShort;
}
int getWindowSourceSize() const {
int sz = m_guideConfiguration.classificationFftSize +
m_limits.maxInhopWithReadahead;
if (m_guideConfiguration.longestFftSize > sz) {
return m_guideConfiguration.longestFftSize;
} else {
return sz;
}
}
};
}