First cut at formant preservation
This commit is contained in:
@@ -41,7 +41,8 @@ RubberBandStretcher::RubberBandStretcher(size_t sampleRate,
|
||||
: nullptr),
|
||||
m_r3d
|
||||
((options & OptionEngineFiner) ?
|
||||
new R3StretcherImpl(R3StretcherImpl::Parameters(sampleRate, channels),
|
||||
new R3StretcherImpl(R3StretcherImpl::Parameters
|
||||
(sampleRate, channels, options),
|
||||
initialTimeRatio, initialPitchScale)
|
||||
: nullptr)
|
||||
{
|
||||
|
||||
@@ -50,23 +50,18 @@ public:
|
||||
fftSize(_fftSize), sampleRate(_sampleRate) { }
|
||||
};
|
||||
|
||||
BinSegmenter(Parameters parameters,
|
||||
BinClassifier::Parameters classifierParameters) :
|
||||
BinSegmenter(Parameters parameters) :
|
||||
m_parameters(parameters),
|
||||
m_classifierParameters(classifierParameters),
|
||||
m_classifier(classifierParameters),
|
||||
m_classification(classifierParameters.binCount,
|
||||
BinClassifier::Classification::Silent),
|
||||
m_numeric(classifierParameters.binCount, 0),
|
||||
m_classFilter(classifierParameters.binCount / 64)
|
||||
m_binCount(m_parameters.fftSize/2 + 1),
|
||||
m_numeric(m_binCount, 0),
|
||||
m_classFilter(m_binCount / 64)
|
||||
{
|
||||
}
|
||||
|
||||
Segmentation segment(const double *const mag) {
|
||||
int n = m_classifierParameters.binCount;
|
||||
m_classifier.classify(mag, m_classification.data());
|
||||
Segmentation segment(const BinClassifier::Classification *classification) {
|
||||
int n = m_binCount;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
switch (m_classification[i]) {
|
||||
switch (classification[i]) {
|
||||
case BinClassifier::Classification::Harmonic:
|
||||
m_numeric[i] = 0; break;
|
||||
case BinClassifier::Classification::Percussive:
|
||||
@@ -108,9 +103,7 @@ public:
|
||||
|
||||
protected:
|
||||
Parameters m_parameters;
|
||||
BinClassifier::Parameters m_classifierParameters;
|
||||
BinClassifier m_classifier;
|
||||
std::vector<BinClassifier::Classification> m_classification;
|
||||
int m_binCount;
|
||||
std::vector<int> m_numeric;
|
||||
MovingMedian<int> m_classFilter;
|
||||
|
||||
|
||||
@@ -144,6 +144,7 @@ public:
|
||||
const BinSegmenter::Segmentation &segmentation,
|
||||
const BinSegmenter::Segmentation &prevSegmentation,
|
||||
const BinSegmenter::Segmentation &nextSegmentation,
|
||||
bool specialCaseUnity,
|
||||
Guidance &guidance) const {
|
||||
|
||||
guidance.kick.present = false;
|
||||
@@ -157,7 +158,7 @@ public:
|
||||
guidance.fftBands[1].fftSize = roundUp(int(ceil(nyquist/16.0)));
|
||||
guidance.fftBands[2].fftSize = roundUp(int(ceil(nyquist/32.0)));
|
||||
|
||||
if (fabs(ratio - 1.0) < 1.0e-6) {
|
||||
if (specialCaseUnity && (fabs(ratio - 1.0) < 1.0e-6)) {
|
||||
guidance.fftBands[0].f0 = 0.0;
|
||||
guidance.fftBands[0].f1 = 0.0;
|
||||
guidance.fftBands[1].f0 = 0.0;
|
||||
|
||||
@@ -88,7 +88,10 @@ R3StretcherImpl::R3StretcherImpl(Parameters parameters,
|
||||
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize; //!!!???
|
||||
m_resampler = std::unique_ptr<Resampler>
|
||||
(new Resampler(resamplerParameters, m_parameters.channels));
|
||||
|
||||
|
||||
m_formant = std::unique_ptr<FormantData>
|
||||
(new FormantData(m_guideConfiguration.classificationFftSize));
|
||||
|
||||
calculateHop();
|
||||
|
||||
m_prevInhop = m_inhop;
|
||||
@@ -146,6 +149,27 @@ R3StretcherImpl::setPitchScale(double scale)
|
||||
calculateHop();
|
||||
}
|
||||
|
||||
void
|
||||
R3StretcherImpl::setFormantOption(RubberBandStretcher::Options options)
|
||||
{
|
||||
int mask = (RubberBandStretcher::OptionFormantShifted |
|
||||
RubberBandStretcher::OptionFormantPreserved);
|
||||
m_parameters.options &= ~mask;
|
||||
options &= mask;
|
||||
m_parameters.options |= options;
|
||||
}
|
||||
|
||||
void
|
||||
R3StretcherImpl::setPitchOption(RubberBandStretcher::Options options)
|
||||
{
|
||||
int mask = (RubberBandStretcher::OptionPitchHighQuality |
|
||||
RubberBandStretcher::OptionPitchHighSpeed |
|
||||
RubberBandStretcher::OptionPitchHighConsistency);
|
||||
m_parameters.options &= ~mask;
|
||||
options &= mask;
|
||||
m_parameters.options |= options;
|
||||
}
|
||||
|
||||
void
|
||||
R3StretcherImpl::calculateHop()
|
||||
{
|
||||
@@ -353,6 +377,10 @@ R3StretcherImpl::consume()
|
||||
analyseChannel(c, inhop, m_prevInhop, m_prevOuthop);
|
||||
}
|
||||
|
||||
if (m_parameters.options & RubberBandStretcher::OptionFormantPreserved) {
|
||||
analyseFormant();
|
||||
}
|
||||
|
||||
// Phase update. This is synchronised across all channels
|
||||
|
||||
for (auto &it : m_channelData[0]->scales) {
|
||||
@@ -538,8 +566,9 @@ R3StretcherImpl::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
||||
|
||||
cd->haveReadahead = true;
|
||||
|
||||
// For the others (and the classify if the inhop has changed) we
|
||||
// operate directly in the scale data and restrict the range for
|
||||
// For the others (and the classify as well, if the inhop has
|
||||
// changed or we haven't filled the readahead yet) we operate
|
||||
// directly in the scale data and restrict the range for
|
||||
// cartesian-polar conversion
|
||||
|
||||
for (auto &it: cd->scales) {
|
||||
@@ -571,15 +600,26 @@ R3StretcherImpl::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
||||
|
||||
// Use the classification scale to get a bin segmentation and
|
||||
// calculate the adaptive frequency guide for this channel
|
||||
|
||||
v_copy(cd->classification.data(), cd->nextClassification.data(),
|
||||
cd->classification.size());
|
||||
cd->classifier->classify(readahead.mag.data(),
|
||||
cd->nextClassification.data());
|
||||
|
||||
cd->prevSegmentation = cd->segmentation;
|
||||
cd->segmentation = cd->nextSegmentation;
|
||||
cd->nextSegmentation = cd->segmenter->segment(readahead.mag.data());
|
||||
cd->nextSegmentation = cd->segmenter->segment(cd->nextClassification.data());
|
||||
|
||||
m_troughPicker.findNearestAndNextPeaks
|
||||
(classifyScale->mag.data(), 3, nullptr,
|
||||
classifyScale->troughs.data());
|
||||
|
||||
double instantaneousRatio = double(prevOuthop) / double(prevInhop);
|
||||
//!!!??? bool specialCaseUnity = !(m_parameters.options &
|
||||
// RubberBandStretcher::OptionPitchHighConsistency);
|
||||
|
||||
bool specialCaseUnity = true;
|
||||
|
||||
m_guide.calculate(instantaneousRatio,
|
||||
classifyScale->mag.data(),
|
||||
classifyScale->troughs.data(),
|
||||
@@ -587,9 +627,70 @@ R3StretcherImpl::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
||||
cd->segmentation,
|
||||
cd->prevSegmentation,
|
||||
cd->nextSegmentation,
|
||||
specialCaseUnity,
|
||||
cd->guidance);
|
||||
}
|
||||
|
||||
void
|
||||
R3StretcherImpl::analyseFormant()
|
||||
{
|
||||
int classify = m_guideConfiguration.classificationFftSize;
|
||||
int binCount = classify/2 + 1;
|
||||
int channels = m_parameters.channels;
|
||||
|
||||
auto &f = *m_formant;
|
||||
|
||||
v_zero(f.envelope.data(), binCount);
|
||||
|
||||
for (int c = 0; c < channels; ++c) {
|
||||
auto &cd = m_channelData.at(c);
|
||||
auto &scale = cd->scales.at(classify);
|
||||
for (int i = 0; i < binCount; ++i) {
|
||||
f.envelope.at(i) += scale->mag.at(i);
|
||||
}
|
||||
}
|
||||
|
||||
m_scaleData.at(classify)->fft.inverseCepstral
|
||||
(f.envelope.data(), f.cepstra.data());
|
||||
|
||||
int cutoff = int(floor(m_parameters.sampleRate / 700.0));
|
||||
if (cutoff < 1) cutoff = 1;
|
||||
|
||||
f.cepstra[0] /= 2.0;
|
||||
f.cepstra[cutoff-1] /= 2.0;
|
||||
for (int i = cutoff; i < classify; ++i) {
|
||||
f.cepstra[i] = 0.0;
|
||||
}
|
||||
v_scale(f.cepstra.data(), 1.0 / double(classify), cutoff);
|
||||
|
||||
m_scaleData.at(classify)->fft.forward
|
||||
(f.cepstra.data(), f.envelope.data(),
|
||||
f.shifted.data()); // shifted is just a spare for this one
|
||||
|
||||
v_exp(f.envelope.data(), binCount);
|
||||
|
||||
for (int i = 0; i < binCount; ++i) {
|
||||
if (f.envelope[i] > 1.0e10) f.envelope[i] = 1.0e10;
|
||||
}
|
||||
|
||||
double scale = m_pitchScale;
|
||||
for (int target = 0; target < binCount; ++target) {
|
||||
int source = int(round(target * scale));
|
||||
if (source >= binCount) {
|
||||
f.shifted[target] = 0.0;
|
||||
} else {
|
||||
f.shifted[target] = f.envelope[source];
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "X:";
|
||||
for (int i = 0; i < binCount; ++i) {
|
||||
if (i > 0) std::cout << ",";
|
||||
std::cout << f.shifted[i];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
void
|
||||
R3StretcherImpl::synthesiseChannel(int c, int outhop)
|
||||
{
|
||||
@@ -598,12 +699,22 @@ R3StretcherImpl::synthesiseChannel(int c, int outhop)
|
||||
auto &cd = m_channelData.at(c);
|
||||
|
||||
for (auto &it : cd->scales) {
|
||||
|
||||
auto &scale = it.second;
|
||||
int bufSize = scale->bufSize;
|
||||
|
||||
// copy to prevMag before filtering
|
||||
v_copy(scale->prevMag.data(),
|
||||
scale->mag.data(),
|
||||
bufSize);
|
||||
|
||||
// formant shift only the middle register
|
||||
if (m_parameters.options & RubberBandStretcher::OptionFormantPreserved) {
|
||||
if (it.first == m_guideConfiguration.classificationFftSize) {
|
||||
v_divide(scale->mag.data(), m_formant->envelope.data(), bufSize);
|
||||
v_multiply(scale->mag.data(), m_formant->shifted.data(), bufSize);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &band : cd->guidance.fftBands) {
|
||||
|
||||
@@ -36,6 +36,8 @@
|
||||
#include "../common/Allocators.h"
|
||||
#include "../common/Window.h"
|
||||
|
||||
#include "../rubberband/RubberBandStretcher.h"
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
@@ -49,8 +51,10 @@ public:
|
||||
struct Parameters {
|
||||
double sampleRate;
|
||||
int channels;
|
||||
RubberBandStretcher::Options options;
|
||||
std::function<void(const std::string &)> logger;
|
||||
Parameters(double _sampleRate, int _channels,
|
||||
RubberBandStretcher::Options options,
|
||||
std::function<void(const std::string &)> _log = &logCout) :
|
||||
sampleRate(_sampleRate), channels(_channels), logger(_log) { }
|
||||
};
|
||||
@@ -68,6 +72,9 @@ public:
|
||||
double getTimeRatio() const;
|
||||
double getPitchScale() const;
|
||||
|
||||
void setFormantOption(RubberBandStretcher::Options);
|
||||
void setPitchOption(RubberBandStretcher::Options);
|
||||
|
||||
size_t getSamplesRequired() const;
|
||||
void process(const float *const *input, size_t samples, bool final);
|
||||
int available() const;
|
||||
@@ -133,6 +140,9 @@ protected:
|
||||
std::map<int, std::shared_ptr<ChannelScaleData>> scales;
|
||||
ClassificationReadaheadData readahead;
|
||||
bool haveReadahead;
|
||||
std::unique_ptr<BinClassifier> classifier;
|
||||
FixedVector<BinClassifier::Classification> classification;
|
||||
FixedVector<BinClassifier::Classification> nextClassification;
|
||||
std::unique_ptr<BinSegmenter> segmenter;
|
||||
BinSegmenter::Segmentation segmentation;
|
||||
BinSegmenter::Segmentation prevSegmentation;
|
||||
@@ -150,8 +160,12 @@ protected:
|
||||
scales(),
|
||||
readahead(segmenterParameters.fftSize),
|
||||
haveReadahead(false),
|
||||
segmenter(new BinSegmenter(segmenterParameters,
|
||||
classifierParameters)),
|
||||
classifier(new BinClassifier(classifierParameters)),
|
||||
classification(classifierParameters.binCount,
|
||||
BinClassifier::Classification::Silent),
|
||||
nextClassification(classifierParameters.binCount,
|
||||
BinClassifier::Classification::Silent),
|
||||
segmenter(new BinSegmenter(segmenterParameters)),
|
||||
segmentation(), prevSegmentation(), nextSegmentation(),
|
||||
mixdown(longestFftSize, 0.f), // though it could be shorter
|
||||
resampled(outRingBufferSize, 0.f),
|
||||
@@ -205,6 +219,17 @@ protected:
|
||||
WindowType synthesisWindowShape(int fftSize);
|
||||
int synthesisWindowLength(int fftSize);
|
||||
};
|
||||
|
||||
struct FormantData {
|
||||
FixedVector<double> cepstra;
|
||||
FixedVector<double> envelope;
|
||||
FixedVector<double> shifted;
|
||||
|
||||
FormantData(int _fftSize) :
|
||||
cepstra(_fftSize, 0.0),
|
||||
envelope(_fftSize, 0.0),
|
||||
shifted(_fftSize, 0.0) { }
|
||||
};
|
||||
|
||||
Parameters m_parameters;
|
||||
|
||||
@@ -219,6 +244,7 @@ protected:
|
||||
Peak<double, std::less<double>> m_troughPicker;
|
||||
std::unique_ptr<StretchCalculator> m_calculator;
|
||||
std::unique_ptr<Resampler> m_resampler;
|
||||
std::unique_ptr<FormantData> m_formant;
|
||||
std::atomic<int> m_inhop;
|
||||
int m_prevInhop;
|
||||
int m_prevOuthop;
|
||||
@@ -227,6 +253,7 @@ protected:
|
||||
void consume();
|
||||
void calculateHop();
|
||||
void analyseChannel(int channel, int inhop, int prevInhop, int prevOuthop);
|
||||
void analyseFormant();
|
||||
void synthesiseChannel(int channel, int outhop);
|
||||
|
||||
double getEffectiveRatio() const {
|
||||
|
||||
Reference in New Issue
Block a user