First cut at formant preservation

This commit is contained in:
Chris Cannam
2022-05-27 14:58:42 +01:00
parent 4d4bc7b4c3
commit f13d96a474
5 changed files with 156 additions and 23 deletions

View File

@@ -41,7 +41,8 @@ RubberBandStretcher::RubberBandStretcher(size_t sampleRate,
: nullptr),
m_r3d
((options & OptionEngineFiner) ?
new R3StretcherImpl(R3StretcherImpl::Parameters(sampleRate, channels),
new R3StretcherImpl(R3StretcherImpl::Parameters
(sampleRate, channels, options),
initialTimeRatio, initialPitchScale)
: nullptr)
{

View File

@@ -50,23 +50,18 @@ public:
fftSize(_fftSize), sampleRate(_sampleRate) { }
};
BinSegmenter(Parameters parameters,
BinClassifier::Parameters classifierParameters) :
BinSegmenter(Parameters parameters) :
m_parameters(parameters),
m_classifierParameters(classifierParameters),
m_classifier(classifierParameters),
m_classification(classifierParameters.binCount,
BinClassifier::Classification::Silent),
m_numeric(classifierParameters.binCount, 0),
m_classFilter(classifierParameters.binCount / 64)
m_binCount(m_parameters.fftSize/2 + 1),
m_numeric(m_binCount, 0),
m_classFilter(m_binCount / 64)
{
}
Segmentation segment(const double *const mag) {
int n = m_classifierParameters.binCount;
m_classifier.classify(mag, m_classification.data());
Segmentation segment(const BinClassifier::Classification *classification) {
int n = m_binCount;
for (int i = 0; i < n; ++i) {
switch (m_classification[i]) {
switch (classification[i]) {
case BinClassifier::Classification::Harmonic:
m_numeric[i] = 0; break;
case BinClassifier::Classification::Percussive:
@@ -108,9 +103,7 @@ public:
protected:
Parameters m_parameters;
BinClassifier::Parameters m_classifierParameters;
BinClassifier m_classifier;
std::vector<BinClassifier::Classification> m_classification;
int m_binCount;
std::vector<int> m_numeric;
MovingMedian<int> m_classFilter;

View File

@@ -144,6 +144,7 @@ public:
const BinSegmenter::Segmentation &segmentation,
const BinSegmenter::Segmentation &prevSegmentation,
const BinSegmenter::Segmentation &nextSegmentation,
bool specialCaseUnity,
Guidance &guidance) const {
guidance.kick.present = false;
@@ -157,7 +158,7 @@ public:
guidance.fftBands[1].fftSize = roundUp(int(ceil(nyquist/16.0)));
guidance.fftBands[2].fftSize = roundUp(int(ceil(nyquist/32.0)));
if (fabs(ratio - 1.0) < 1.0e-6) {
if (specialCaseUnity && (fabs(ratio - 1.0) < 1.0e-6)) {
guidance.fftBands[0].f0 = 0.0;
guidance.fftBands[0].f1 = 0.0;
guidance.fftBands[1].f0 = 0.0;

View File

@@ -88,7 +88,10 @@ R3StretcherImpl::R3StretcherImpl(Parameters parameters,
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize; //!!!???
m_resampler = std::unique_ptr<Resampler>
(new Resampler(resamplerParameters, m_parameters.channels));
m_formant = std::unique_ptr<FormantData>
(new FormantData(m_guideConfiguration.classificationFftSize));
calculateHop();
m_prevInhop = m_inhop;
@@ -146,6 +149,27 @@ R3StretcherImpl::setPitchScale(double scale)
calculateHop();
}
void
R3StretcherImpl::setFormantOption(RubberBandStretcher::Options options)
{
int mask = (RubberBandStretcher::OptionFormantShifted |
RubberBandStretcher::OptionFormantPreserved);
m_parameters.options &= ~mask;
options &= mask;
m_parameters.options |= options;
}
void
R3StretcherImpl::setPitchOption(RubberBandStretcher::Options options)
{
int mask = (RubberBandStretcher::OptionPitchHighQuality |
RubberBandStretcher::OptionPitchHighSpeed |
RubberBandStretcher::OptionPitchHighConsistency);
m_parameters.options &= ~mask;
options &= mask;
m_parameters.options |= options;
}
void
R3StretcherImpl::calculateHop()
{
@@ -353,6 +377,10 @@ R3StretcherImpl::consume()
analyseChannel(c, inhop, m_prevInhop, m_prevOuthop);
}
if (m_parameters.options & RubberBandStretcher::OptionFormantPreserved) {
analyseFormant();
}
// Phase update. This is synchronised across all channels
for (auto &it : m_channelData[0]->scales) {
@@ -538,8 +566,9 @@ R3StretcherImpl::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
cd->haveReadahead = true;
// For the others (and the classify if the inhop has changed) we
// operate directly in the scale data and restrict the range for
// For the others (and the classify as well, if the inhop has
// changed or we haven't filled the readahead yet) we operate
// directly in the scale data and restrict the range for
// cartesian-polar conversion
for (auto &it: cd->scales) {
@@ -571,15 +600,26 @@ R3StretcherImpl::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
// Use the classification scale to get a bin segmentation and
// calculate the adaptive frequency guide for this channel
v_copy(cd->classification.data(), cd->nextClassification.data(),
cd->classification.size());
cd->classifier->classify(readahead.mag.data(),
cd->nextClassification.data());
cd->prevSegmentation = cd->segmentation;
cd->segmentation = cd->nextSegmentation;
cd->nextSegmentation = cd->segmenter->segment(readahead.mag.data());
cd->nextSegmentation = cd->segmenter->segment(cd->nextClassification.data());
m_troughPicker.findNearestAndNextPeaks
(classifyScale->mag.data(), 3, nullptr,
classifyScale->troughs.data());
double instantaneousRatio = double(prevOuthop) / double(prevInhop);
//!!!??? bool specialCaseUnity = !(m_parameters.options &
// RubberBandStretcher::OptionPitchHighConsistency);
bool specialCaseUnity = true;
m_guide.calculate(instantaneousRatio,
classifyScale->mag.data(),
classifyScale->troughs.data(),
@@ -587,9 +627,70 @@ R3StretcherImpl::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
cd->segmentation,
cd->prevSegmentation,
cd->nextSegmentation,
specialCaseUnity,
cd->guidance);
}
void
R3StretcherImpl::analyseFormant()
{
int classify = m_guideConfiguration.classificationFftSize;
int binCount = classify/2 + 1;
int channels = m_parameters.channels;
auto &f = *m_formant;
v_zero(f.envelope.data(), binCount);
for (int c = 0; c < channels; ++c) {
auto &cd = m_channelData.at(c);
auto &scale = cd->scales.at(classify);
for (int i = 0; i < binCount; ++i) {
f.envelope.at(i) += scale->mag.at(i);
}
}
m_scaleData.at(classify)->fft.inverseCepstral
(f.envelope.data(), f.cepstra.data());
int cutoff = int(floor(m_parameters.sampleRate / 700.0));
if (cutoff < 1) cutoff = 1;
f.cepstra[0] /= 2.0;
f.cepstra[cutoff-1] /= 2.0;
for (int i = cutoff; i < classify; ++i) {
f.cepstra[i] = 0.0;
}
v_scale(f.cepstra.data(), 1.0 / double(classify), cutoff);
m_scaleData.at(classify)->fft.forward
(f.cepstra.data(), f.envelope.data(),
f.shifted.data()); // shifted is just a spare for this one
v_exp(f.envelope.data(), binCount);
for (int i = 0; i < binCount; ++i) {
if (f.envelope[i] > 1.0e10) f.envelope[i] = 1.0e10;
}
double scale = m_pitchScale;
for (int target = 0; target < binCount; ++target) {
int source = int(round(target * scale));
if (source >= binCount) {
f.shifted[target] = 0.0;
} else {
f.shifted[target] = f.envelope[source];
}
}
std::cout << "X:";
for (int i = 0; i < binCount; ++i) {
if (i > 0) std::cout << ",";
std::cout << f.shifted[i];
}
std::cout << std::endl;
}
void
R3StretcherImpl::synthesiseChannel(int c, int outhop)
{
@@ -598,12 +699,22 @@ R3StretcherImpl::synthesiseChannel(int c, int outhop)
auto &cd = m_channelData.at(c);
for (auto &it : cd->scales) {
auto &scale = it.second;
int bufSize = scale->bufSize;
// copy to prevMag before filtering
v_copy(scale->prevMag.data(),
scale->mag.data(),
bufSize);
// formant shift only the middle register
if (m_parameters.options & RubberBandStretcher::OptionFormantPreserved) {
if (it.first == m_guideConfiguration.classificationFftSize) {
v_divide(scale->mag.data(), m_formant->envelope.data(), bufSize);
v_multiply(scale->mag.data(), m_formant->shifted.data(), bufSize);
}
}
}
for (const auto &band : cd->guidance.fftBands) {

View File

@@ -36,6 +36,8 @@
#include "../common/Allocators.h"
#include "../common/Window.h"
#include "../rubberband/RubberBandStretcher.h"
#include <map>
#include <memory>
#include <functional>
@@ -49,8 +51,10 @@ public:
struct Parameters {
double sampleRate;
int channels;
RubberBandStretcher::Options options;
std::function<void(const std::string &)> logger;
Parameters(double _sampleRate, int _channels,
RubberBandStretcher::Options options,
std::function<void(const std::string &)> _log = &logCout) :
sampleRate(_sampleRate), channels(_channels), logger(_log) { }
};
@@ -68,6 +72,9 @@ public:
double getTimeRatio() const;
double getPitchScale() const;
void setFormantOption(RubberBandStretcher::Options);
void setPitchOption(RubberBandStretcher::Options);
size_t getSamplesRequired() const;
void process(const float *const *input, size_t samples, bool final);
int available() const;
@@ -133,6 +140,9 @@ protected:
std::map<int, std::shared_ptr<ChannelScaleData>> scales;
ClassificationReadaheadData readahead;
bool haveReadahead;
std::unique_ptr<BinClassifier> classifier;
FixedVector<BinClassifier::Classification> classification;
FixedVector<BinClassifier::Classification> nextClassification;
std::unique_ptr<BinSegmenter> segmenter;
BinSegmenter::Segmentation segmentation;
BinSegmenter::Segmentation prevSegmentation;
@@ -150,8 +160,12 @@ protected:
scales(),
readahead(segmenterParameters.fftSize),
haveReadahead(false),
segmenter(new BinSegmenter(segmenterParameters,
classifierParameters)),
classifier(new BinClassifier(classifierParameters)),
classification(classifierParameters.binCount,
BinClassifier::Classification::Silent),
nextClassification(classifierParameters.binCount,
BinClassifier::Classification::Silent),
segmenter(new BinSegmenter(segmenterParameters)),
segmentation(), prevSegmentation(), nextSegmentation(),
mixdown(longestFftSize, 0.f), // though it could be shorter
resampled(outRingBufferSize, 0.f),
@@ -205,6 +219,17 @@ protected:
WindowType synthesisWindowShape(int fftSize);
int synthesisWindowLength(int fftSize);
};
struct FormantData {
FixedVector<double> cepstra;
FixedVector<double> envelope;
FixedVector<double> shifted;
FormantData(int _fftSize) :
cepstra(_fftSize, 0.0),
envelope(_fftSize, 0.0),
shifted(_fftSize, 0.0) { }
};
Parameters m_parameters;
@@ -219,6 +244,7 @@ protected:
Peak<double, std::less<double>> m_troughPicker;
std::unique_ptr<StretchCalculator> m_calculator;
std::unique_ptr<Resampler> m_resampler;
std::unique_ptr<FormantData> m_formant;
std::atomic<int> m_inhop;
int m_prevInhop;
int m_prevOuthop;
@@ -227,6 +253,7 @@ protected:
void consume();
void calculateHop();
void analyseChannel(int channel, int inhop, int prevInhop, int prevOuthop);
void analyseFormant();
void synthesiseChannel(int channel, int outhop);
double getEffectiveRatio() const {