First cut at formant preservation
This commit is contained in:
@@ -41,7 +41,8 @@ RubberBandStretcher::RubberBandStretcher(size_t sampleRate,
|
|||||||
: nullptr),
|
: nullptr),
|
||||||
m_r3d
|
m_r3d
|
||||||
((options & OptionEngineFiner) ?
|
((options & OptionEngineFiner) ?
|
||||||
new R3StretcherImpl(R3StretcherImpl::Parameters(sampleRate, channels),
|
new R3StretcherImpl(R3StretcherImpl::Parameters
|
||||||
|
(sampleRate, channels, options),
|
||||||
initialTimeRatio, initialPitchScale)
|
initialTimeRatio, initialPitchScale)
|
||||||
: nullptr)
|
: nullptr)
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -50,23 +50,18 @@ public:
|
|||||||
fftSize(_fftSize), sampleRate(_sampleRate) { }
|
fftSize(_fftSize), sampleRate(_sampleRate) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
BinSegmenter(Parameters parameters,
|
BinSegmenter(Parameters parameters) :
|
||||||
BinClassifier::Parameters classifierParameters) :
|
|
||||||
m_parameters(parameters),
|
m_parameters(parameters),
|
||||||
m_classifierParameters(classifierParameters),
|
m_binCount(m_parameters.fftSize/2 + 1),
|
||||||
m_classifier(classifierParameters),
|
m_numeric(m_binCount, 0),
|
||||||
m_classification(classifierParameters.binCount,
|
m_classFilter(m_binCount / 64)
|
||||||
BinClassifier::Classification::Silent),
|
|
||||||
m_numeric(classifierParameters.binCount, 0),
|
|
||||||
m_classFilter(classifierParameters.binCount / 64)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
Segmentation segment(const double *const mag) {
|
Segmentation segment(const BinClassifier::Classification *classification) {
|
||||||
int n = m_classifierParameters.binCount;
|
int n = m_binCount;
|
||||||
m_classifier.classify(mag, m_classification.data());
|
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
switch (m_classification[i]) {
|
switch (classification[i]) {
|
||||||
case BinClassifier::Classification::Harmonic:
|
case BinClassifier::Classification::Harmonic:
|
||||||
m_numeric[i] = 0; break;
|
m_numeric[i] = 0; break;
|
||||||
case BinClassifier::Classification::Percussive:
|
case BinClassifier::Classification::Percussive:
|
||||||
@@ -108,9 +103,7 @@ public:
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
Parameters m_parameters;
|
Parameters m_parameters;
|
||||||
BinClassifier::Parameters m_classifierParameters;
|
int m_binCount;
|
||||||
BinClassifier m_classifier;
|
|
||||||
std::vector<BinClassifier::Classification> m_classification;
|
|
||||||
std::vector<int> m_numeric;
|
std::vector<int> m_numeric;
|
||||||
MovingMedian<int> m_classFilter;
|
MovingMedian<int> m_classFilter;
|
||||||
|
|
||||||
|
|||||||
@@ -144,6 +144,7 @@ public:
|
|||||||
const BinSegmenter::Segmentation &segmentation,
|
const BinSegmenter::Segmentation &segmentation,
|
||||||
const BinSegmenter::Segmentation &prevSegmentation,
|
const BinSegmenter::Segmentation &prevSegmentation,
|
||||||
const BinSegmenter::Segmentation &nextSegmentation,
|
const BinSegmenter::Segmentation &nextSegmentation,
|
||||||
|
bool specialCaseUnity,
|
||||||
Guidance &guidance) const {
|
Guidance &guidance) const {
|
||||||
|
|
||||||
guidance.kick.present = false;
|
guidance.kick.present = false;
|
||||||
@@ -157,7 +158,7 @@ public:
|
|||||||
guidance.fftBands[1].fftSize = roundUp(int(ceil(nyquist/16.0)));
|
guidance.fftBands[1].fftSize = roundUp(int(ceil(nyquist/16.0)));
|
||||||
guidance.fftBands[2].fftSize = roundUp(int(ceil(nyquist/32.0)));
|
guidance.fftBands[2].fftSize = roundUp(int(ceil(nyquist/32.0)));
|
||||||
|
|
||||||
if (fabs(ratio - 1.0) < 1.0e-6) {
|
if (specialCaseUnity && (fabs(ratio - 1.0) < 1.0e-6)) {
|
||||||
guidance.fftBands[0].f0 = 0.0;
|
guidance.fftBands[0].f0 = 0.0;
|
||||||
guidance.fftBands[0].f1 = 0.0;
|
guidance.fftBands[0].f1 = 0.0;
|
||||||
guidance.fftBands[1].f0 = 0.0;
|
guidance.fftBands[1].f0 = 0.0;
|
||||||
|
|||||||
@@ -89,6 +89,9 @@ R3StretcherImpl::R3StretcherImpl(Parameters parameters,
|
|||||||
m_resampler = std::unique_ptr<Resampler>
|
m_resampler = std::unique_ptr<Resampler>
|
||||||
(new Resampler(resamplerParameters, m_parameters.channels));
|
(new Resampler(resamplerParameters, m_parameters.channels));
|
||||||
|
|
||||||
|
m_formant = std::unique_ptr<FormantData>
|
||||||
|
(new FormantData(m_guideConfiguration.classificationFftSize));
|
||||||
|
|
||||||
calculateHop();
|
calculateHop();
|
||||||
|
|
||||||
m_prevInhop = m_inhop;
|
m_prevInhop = m_inhop;
|
||||||
@@ -146,6 +149,27 @@ R3StretcherImpl::setPitchScale(double scale)
|
|||||||
calculateHop();
|
calculateHop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
R3StretcherImpl::setFormantOption(RubberBandStretcher::Options options)
|
||||||
|
{
|
||||||
|
int mask = (RubberBandStretcher::OptionFormantShifted |
|
||||||
|
RubberBandStretcher::OptionFormantPreserved);
|
||||||
|
m_parameters.options &= ~mask;
|
||||||
|
options &= mask;
|
||||||
|
m_parameters.options |= options;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
R3StretcherImpl::setPitchOption(RubberBandStretcher::Options options)
|
||||||
|
{
|
||||||
|
int mask = (RubberBandStretcher::OptionPitchHighQuality |
|
||||||
|
RubberBandStretcher::OptionPitchHighSpeed |
|
||||||
|
RubberBandStretcher::OptionPitchHighConsistency);
|
||||||
|
m_parameters.options &= ~mask;
|
||||||
|
options &= mask;
|
||||||
|
m_parameters.options |= options;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
R3StretcherImpl::calculateHop()
|
R3StretcherImpl::calculateHop()
|
||||||
{
|
{
|
||||||
@@ -353,6 +377,10 @@ R3StretcherImpl::consume()
|
|||||||
analyseChannel(c, inhop, m_prevInhop, m_prevOuthop);
|
analyseChannel(c, inhop, m_prevInhop, m_prevOuthop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (m_parameters.options & RubberBandStretcher::OptionFormantPreserved) {
|
||||||
|
analyseFormant();
|
||||||
|
}
|
||||||
|
|
||||||
// Phase update. This is synchronised across all channels
|
// Phase update. This is synchronised across all channels
|
||||||
|
|
||||||
for (auto &it : m_channelData[0]->scales) {
|
for (auto &it : m_channelData[0]->scales) {
|
||||||
@@ -538,8 +566,9 @@ R3StretcherImpl::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
|
|
||||||
cd->haveReadahead = true;
|
cd->haveReadahead = true;
|
||||||
|
|
||||||
// For the others (and the classify if the inhop has changed) we
|
// For the others (and the classify as well, if the inhop has
|
||||||
// operate directly in the scale data and restrict the range for
|
// changed or we haven't filled the readahead yet) we operate
|
||||||
|
// directly in the scale data and restrict the range for
|
||||||
// cartesian-polar conversion
|
// cartesian-polar conversion
|
||||||
|
|
||||||
for (auto &it: cd->scales) {
|
for (auto &it: cd->scales) {
|
||||||
@@ -571,15 +600,26 @@ R3StretcherImpl::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
|
|
||||||
// Use the classification scale to get a bin segmentation and
|
// Use the classification scale to get a bin segmentation and
|
||||||
// calculate the adaptive frequency guide for this channel
|
// calculate the adaptive frequency guide for this channel
|
||||||
|
|
||||||
|
v_copy(cd->classification.data(), cd->nextClassification.data(),
|
||||||
|
cd->classification.size());
|
||||||
|
cd->classifier->classify(readahead.mag.data(),
|
||||||
|
cd->nextClassification.data());
|
||||||
|
|
||||||
cd->prevSegmentation = cd->segmentation;
|
cd->prevSegmentation = cd->segmentation;
|
||||||
cd->segmentation = cd->nextSegmentation;
|
cd->segmentation = cd->nextSegmentation;
|
||||||
cd->nextSegmentation = cd->segmenter->segment(readahead.mag.data());
|
cd->nextSegmentation = cd->segmenter->segment(cd->nextClassification.data());
|
||||||
|
|
||||||
m_troughPicker.findNearestAndNextPeaks
|
m_troughPicker.findNearestAndNextPeaks
|
||||||
(classifyScale->mag.data(), 3, nullptr,
|
(classifyScale->mag.data(), 3, nullptr,
|
||||||
classifyScale->troughs.data());
|
classifyScale->troughs.data());
|
||||||
|
|
||||||
double instantaneousRatio = double(prevOuthop) / double(prevInhop);
|
double instantaneousRatio = double(prevOuthop) / double(prevInhop);
|
||||||
|
//!!!??? bool specialCaseUnity = !(m_parameters.options &
|
||||||
|
// RubberBandStretcher::OptionPitchHighConsistency);
|
||||||
|
|
||||||
|
bool specialCaseUnity = true;
|
||||||
|
|
||||||
m_guide.calculate(instantaneousRatio,
|
m_guide.calculate(instantaneousRatio,
|
||||||
classifyScale->mag.data(),
|
classifyScale->mag.data(),
|
||||||
classifyScale->troughs.data(),
|
classifyScale->troughs.data(),
|
||||||
@@ -587,9 +627,70 @@ R3StretcherImpl::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
cd->segmentation,
|
cd->segmentation,
|
||||||
cd->prevSegmentation,
|
cd->prevSegmentation,
|
||||||
cd->nextSegmentation,
|
cd->nextSegmentation,
|
||||||
|
specialCaseUnity,
|
||||||
cd->guidance);
|
cd->guidance);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
R3StretcherImpl::analyseFormant()
|
||||||
|
{
|
||||||
|
int classify = m_guideConfiguration.classificationFftSize;
|
||||||
|
int binCount = classify/2 + 1;
|
||||||
|
int channels = m_parameters.channels;
|
||||||
|
|
||||||
|
auto &f = *m_formant;
|
||||||
|
|
||||||
|
v_zero(f.envelope.data(), binCount);
|
||||||
|
|
||||||
|
for (int c = 0; c < channels; ++c) {
|
||||||
|
auto &cd = m_channelData.at(c);
|
||||||
|
auto &scale = cd->scales.at(classify);
|
||||||
|
for (int i = 0; i < binCount; ++i) {
|
||||||
|
f.envelope.at(i) += scale->mag.at(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
m_scaleData.at(classify)->fft.inverseCepstral
|
||||||
|
(f.envelope.data(), f.cepstra.data());
|
||||||
|
|
||||||
|
int cutoff = int(floor(m_parameters.sampleRate / 700.0));
|
||||||
|
if (cutoff < 1) cutoff = 1;
|
||||||
|
|
||||||
|
f.cepstra[0] /= 2.0;
|
||||||
|
f.cepstra[cutoff-1] /= 2.0;
|
||||||
|
for (int i = cutoff; i < classify; ++i) {
|
||||||
|
f.cepstra[i] = 0.0;
|
||||||
|
}
|
||||||
|
v_scale(f.cepstra.data(), 1.0 / double(classify), cutoff);
|
||||||
|
|
||||||
|
m_scaleData.at(classify)->fft.forward
|
||||||
|
(f.cepstra.data(), f.envelope.data(),
|
||||||
|
f.shifted.data()); // shifted is just a spare for this one
|
||||||
|
|
||||||
|
v_exp(f.envelope.data(), binCount);
|
||||||
|
|
||||||
|
for (int i = 0; i < binCount; ++i) {
|
||||||
|
if (f.envelope[i] > 1.0e10) f.envelope[i] = 1.0e10;
|
||||||
|
}
|
||||||
|
|
||||||
|
double scale = m_pitchScale;
|
||||||
|
for (int target = 0; target < binCount; ++target) {
|
||||||
|
int source = int(round(target * scale));
|
||||||
|
if (source >= binCount) {
|
||||||
|
f.shifted[target] = 0.0;
|
||||||
|
} else {
|
||||||
|
f.shifted[target] = f.envelope[source];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "X:";
|
||||||
|
for (int i = 0; i < binCount; ++i) {
|
||||||
|
if (i > 0) std::cout << ",";
|
||||||
|
std::cout << f.shifted[i];
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
R3StretcherImpl::synthesiseChannel(int c, int outhop)
|
R3StretcherImpl::synthesiseChannel(int c, int outhop)
|
||||||
{
|
{
|
||||||
@@ -598,12 +699,22 @@ R3StretcherImpl::synthesiseChannel(int c, int outhop)
|
|||||||
auto &cd = m_channelData.at(c);
|
auto &cd = m_channelData.at(c);
|
||||||
|
|
||||||
for (auto &it : cd->scales) {
|
for (auto &it : cd->scales) {
|
||||||
|
|
||||||
auto &scale = it.second;
|
auto &scale = it.second;
|
||||||
int bufSize = scale->bufSize;
|
int bufSize = scale->bufSize;
|
||||||
|
|
||||||
// copy to prevMag before filtering
|
// copy to prevMag before filtering
|
||||||
v_copy(scale->prevMag.data(),
|
v_copy(scale->prevMag.data(),
|
||||||
scale->mag.data(),
|
scale->mag.data(),
|
||||||
bufSize);
|
bufSize);
|
||||||
|
|
||||||
|
// formant shift only the middle register
|
||||||
|
if (m_parameters.options & RubberBandStretcher::OptionFormantPreserved) {
|
||||||
|
if (it.first == m_guideConfiguration.classificationFftSize) {
|
||||||
|
v_divide(scale->mag.data(), m_formant->envelope.data(), bufSize);
|
||||||
|
v_multiply(scale->mag.data(), m_formant->shifted.data(), bufSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const auto &band : cd->guidance.fftBands) {
|
for (const auto &band : cd->guidance.fftBands) {
|
||||||
|
|||||||
@@ -36,6 +36,8 @@
|
|||||||
#include "../common/Allocators.h"
|
#include "../common/Allocators.h"
|
||||||
#include "../common/Window.h"
|
#include "../common/Window.h"
|
||||||
|
|
||||||
|
#include "../rubberband/RubberBandStretcher.h"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
@@ -49,8 +51,10 @@ public:
|
|||||||
struct Parameters {
|
struct Parameters {
|
||||||
double sampleRate;
|
double sampleRate;
|
||||||
int channels;
|
int channels;
|
||||||
|
RubberBandStretcher::Options options;
|
||||||
std::function<void(const std::string &)> logger;
|
std::function<void(const std::string &)> logger;
|
||||||
Parameters(double _sampleRate, int _channels,
|
Parameters(double _sampleRate, int _channels,
|
||||||
|
RubberBandStretcher::Options options,
|
||||||
std::function<void(const std::string &)> _log = &logCout) :
|
std::function<void(const std::string &)> _log = &logCout) :
|
||||||
sampleRate(_sampleRate), channels(_channels), logger(_log) { }
|
sampleRate(_sampleRate), channels(_channels), logger(_log) { }
|
||||||
};
|
};
|
||||||
@@ -68,6 +72,9 @@ public:
|
|||||||
double getTimeRatio() const;
|
double getTimeRatio() const;
|
||||||
double getPitchScale() const;
|
double getPitchScale() const;
|
||||||
|
|
||||||
|
void setFormantOption(RubberBandStretcher::Options);
|
||||||
|
void setPitchOption(RubberBandStretcher::Options);
|
||||||
|
|
||||||
size_t getSamplesRequired() const;
|
size_t getSamplesRequired() const;
|
||||||
void process(const float *const *input, size_t samples, bool final);
|
void process(const float *const *input, size_t samples, bool final);
|
||||||
int available() const;
|
int available() const;
|
||||||
@@ -133,6 +140,9 @@ protected:
|
|||||||
std::map<int, std::shared_ptr<ChannelScaleData>> scales;
|
std::map<int, std::shared_ptr<ChannelScaleData>> scales;
|
||||||
ClassificationReadaheadData readahead;
|
ClassificationReadaheadData readahead;
|
||||||
bool haveReadahead;
|
bool haveReadahead;
|
||||||
|
std::unique_ptr<BinClassifier> classifier;
|
||||||
|
FixedVector<BinClassifier::Classification> classification;
|
||||||
|
FixedVector<BinClassifier::Classification> nextClassification;
|
||||||
std::unique_ptr<BinSegmenter> segmenter;
|
std::unique_ptr<BinSegmenter> segmenter;
|
||||||
BinSegmenter::Segmentation segmentation;
|
BinSegmenter::Segmentation segmentation;
|
||||||
BinSegmenter::Segmentation prevSegmentation;
|
BinSegmenter::Segmentation prevSegmentation;
|
||||||
@@ -150,8 +160,12 @@ protected:
|
|||||||
scales(),
|
scales(),
|
||||||
readahead(segmenterParameters.fftSize),
|
readahead(segmenterParameters.fftSize),
|
||||||
haveReadahead(false),
|
haveReadahead(false),
|
||||||
segmenter(new BinSegmenter(segmenterParameters,
|
classifier(new BinClassifier(classifierParameters)),
|
||||||
classifierParameters)),
|
classification(classifierParameters.binCount,
|
||||||
|
BinClassifier::Classification::Silent),
|
||||||
|
nextClassification(classifierParameters.binCount,
|
||||||
|
BinClassifier::Classification::Silent),
|
||||||
|
segmenter(new BinSegmenter(segmenterParameters)),
|
||||||
segmentation(), prevSegmentation(), nextSegmentation(),
|
segmentation(), prevSegmentation(), nextSegmentation(),
|
||||||
mixdown(longestFftSize, 0.f), // though it could be shorter
|
mixdown(longestFftSize, 0.f), // though it could be shorter
|
||||||
resampled(outRingBufferSize, 0.f),
|
resampled(outRingBufferSize, 0.f),
|
||||||
@@ -206,6 +220,17 @@ protected:
|
|||||||
int synthesisWindowLength(int fftSize);
|
int synthesisWindowLength(int fftSize);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct FormantData {
|
||||||
|
FixedVector<double> cepstra;
|
||||||
|
FixedVector<double> envelope;
|
||||||
|
FixedVector<double> shifted;
|
||||||
|
|
||||||
|
FormantData(int _fftSize) :
|
||||||
|
cepstra(_fftSize, 0.0),
|
||||||
|
envelope(_fftSize, 0.0),
|
||||||
|
shifted(_fftSize, 0.0) { }
|
||||||
|
};
|
||||||
|
|
||||||
Parameters m_parameters;
|
Parameters m_parameters;
|
||||||
|
|
||||||
std::atomic<double> m_timeRatio;
|
std::atomic<double> m_timeRatio;
|
||||||
@@ -219,6 +244,7 @@ protected:
|
|||||||
Peak<double, std::less<double>> m_troughPicker;
|
Peak<double, std::less<double>> m_troughPicker;
|
||||||
std::unique_ptr<StretchCalculator> m_calculator;
|
std::unique_ptr<StretchCalculator> m_calculator;
|
||||||
std::unique_ptr<Resampler> m_resampler;
|
std::unique_ptr<Resampler> m_resampler;
|
||||||
|
std::unique_ptr<FormantData> m_formant;
|
||||||
std::atomic<int> m_inhop;
|
std::atomic<int> m_inhop;
|
||||||
int m_prevInhop;
|
int m_prevInhop;
|
||||||
int m_prevOuthop;
|
int m_prevOuthop;
|
||||||
@@ -227,6 +253,7 @@ protected:
|
|||||||
void consume();
|
void consume();
|
||||||
void calculateHop();
|
void calculateHop();
|
||||||
void analyseChannel(int channel, int inhop, int prevInhop, int prevOuthop);
|
void analyseChannel(int channel, int inhop, int prevInhop, int prevOuthop);
|
||||||
|
void analyseFormant();
|
||||||
void synthesiseChannel(int channel, int outhop);
|
void synthesiseChannel(int channel, int outhop);
|
||||||
|
|
||||||
double getEffectiveRatio() const {
|
double getEffectiveRatio() const {
|
||||||
|
|||||||
Reference in New Issue
Block a user