In offline mode, create the resampler only if needed (i.e. if the pitch ratio is still 1.0 at the point when process is first called); and use plain 2048-sample fft with unity stretch
This commit is contained in:
@@ -127,7 +127,8 @@ public:
|
|||||||
|
|
||||||
// This is the classification and fallback FFT: we need it to
|
// This is the classification and fallback FFT: we need it to
|
||||||
// go up to Nyquist so we can seamlessly switch to it for
|
// go up to Nyquist so we can seamlessly switch to it for
|
||||||
// longer stretches
|
// longer stretches, and down to 0.0 so we can use it for
|
||||||
|
// unity in offline mode
|
||||||
bandFftSize = roundUp(int(ceil(rate/32.0)));
|
bandFftSize = roundUp(int(ceil(rate/32.0)));
|
||||||
m_configuration.fftBandLimits[1] =
|
m_configuration.fftBandLimits[1] =
|
||||||
BandLimits(bandFftSize, rate, 0.0, rate / 2.0);
|
BandLimits(bandFftSize, rate, 0.0, rate / 2.0);
|
||||||
@@ -154,6 +155,7 @@ public:
|
|||||||
const BinSegmenter::Segmentation &nextSegmentation,
|
const BinSegmenter::Segmentation &nextSegmentation,
|
||||||
double meanMagnitude,
|
double meanMagnitude,
|
||||||
int unityCount,
|
int unityCount,
|
||||||
|
bool realtime,
|
||||||
Guidance &guidance) const {
|
Guidance &guidance) const {
|
||||||
|
|
||||||
bool hadPhaseReset = guidance.phaseReset.present;
|
bool hadPhaseReset = guidance.phaseReset.present;
|
||||||
@@ -182,7 +184,8 @@ public:
|
|||||||
hadPhaseReset,
|
hadPhaseReset,
|
||||||
unityCount,
|
unityCount,
|
||||||
magnitudes,
|
magnitudes,
|
||||||
segmentation);
|
segmentation,
|
||||||
|
realtime);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -380,12 +383,28 @@ protected:
|
|||||||
bool hadPhaseReset,
|
bool hadPhaseReset,
|
||||||
uint32_t unityCount,
|
uint32_t unityCount,
|
||||||
const double *const magnitudes,
|
const double *const magnitudes,
|
||||||
const BinSegmenter::Segmentation &segmentation) const {
|
const BinSegmenter::Segmentation &segmentation,
|
||||||
|
bool realtime) const {
|
||||||
|
|
||||||
// std::cout << "unity" << std::endl;
|
// std::cout << "unity" << std::endl;
|
||||||
|
|
||||||
double nyquist = m_parameters.sampleRate / 2.0;
|
double nyquist = m_parameters.sampleRate / 2.0;
|
||||||
|
|
||||||
|
if (!realtime) {
|
||||||
|
// ratio can't change, so we are just running 1.0 ratio
|
||||||
|
// throughout
|
||||||
|
guidance.fftBands[0].f0 = 0.0;
|
||||||
|
guidance.fftBands[0].f1 = 0.0;
|
||||||
|
guidance.fftBands[1].f0 = 0.0;
|
||||||
|
guidance.fftBands[1].f1 = nyquist;
|
||||||
|
guidance.fftBands[2].f0 = nyquist;
|
||||||
|
guidance.fftBands[2].f1 = nyquist;
|
||||||
|
guidance.phaseReset.present = true;
|
||||||
|
guidance.phaseReset.f0 = 0.0;
|
||||||
|
guidance.phaseReset.f1 = nyquist;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
guidance.fftBands[0].f0 = 0.0;
|
guidance.fftBands[0].f0 = 0.0;
|
||||||
guidance.fftBands[0].f1 = m_minLower;
|
guidance.fftBands[0].f1 = m_minLower;
|
||||||
guidance.fftBands[1].f0 = m_minLower;
|
guidance.fftBands[1].f0 = m_minLower;
|
||||||
|
|||||||
@@ -104,22 +104,13 @@ R3Stretcher::R3Stretcher(Parameters parameters,
|
|||||||
1, false, // no fixed inputIncrement
|
1, false, // no fixed inputIncrement
|
||||||
m_log));
|
m_log));
|
||||||
|
|
||||||
Resampler::Parameters resamplerParameters;
|
|
||||||
resamplerParameters.quality = Resampler::FastestTolerable;
|
|
||||||
|
|
||||||
if (isRealTime()) {
|
if (isRealTime()) {
|
||||||
resamplerParameters.dynamism = Resampler::RatioOftenChanging;
|
createResampler();
|
||||||
resamplerParameters.ratioChange = Resampler::SmoothRatioChange;
|
// In offline mode we don't create the resampler yet - we
|
||||||
} else {
|
// don't want to have one at all if the pitch ratio is 1.0,
|
||||||
// ratio can't be changed in offline mode
|
// but that could change before the first process call, so we
|
||||||
resamplerParameters.dynamism = Resampler::RatioMostlyFixed;
|
// create the resampler if needed then
|
||||||
resamplerParameters.ratioChange = Resampler::SuddenRatioChange;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
resamplerParameters.initialSampleRate = m_parameters.sampleRate;
|
|
||||||
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize; //!!!???
|
|
||||||
m_resampler = std::unique_ptr<Resampler>
|
|
||||||
(new Resampler(resamplerParameters, m_parameters.channels));
|
|
||||||
|
|
||||||
calculateHop();
|
calculateHop();
|
||||||
|
|
||||||
@@ -132,23 +123,6 @@ R3Stretcher::R3Stretcher(Parameters parameters,
|
|||||||
if (!m_timeRatio.is_lock_free()) {
|
if (!m_timeRatio.is_lock_free()) {
|
||||||
m_log.log(0, "WARNING: std::atomic<double> is not lock-free");
|
m_log.log(0, "WARNING: std::atomic<double> is not lock-free");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pad to half of the longest frame. As with R2, in real-time mode
|
|
||||||
// we don't do this -- it's better to start with a swoosh than
|
|
||||||
// introduce more latency, and we don't want gaps when the ratio
|
|
||||||
// changes.
|
|
||||||
|
|
||||||
if (!isRealTime()) {
|
|
||||||
int pad = m_guideConfiguration.longestFftSize / 2;
|
|
||||||
m_log.log(1, "offline mode: prefilling with", pad);
|
|
||||||
for (int c = 0; c < m_parameters.channels; ++c) {
|
|
||||||
m_channelData[c]->inbuf->zero(pad);
|
|
||||||
}
|
|
||||||
// By the time we skip this later we will have resampled
|
|
||||||
m_startSkip = int(round(pad / m_pitchScale));
|
|
||||||
} else {
|
|
||||||
m_log.log(1, "realtime mode: no prefill");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
WindowType
|
WindowType
|
||||||
@@ -249,6 +223,27 @@ R3Stretcher::setKeyFrameMap(const std::map<size_t, size_t> &mapping)
|
|||||||
m_keyFrameMap = mapping;
|
m_keyFrameMap = mapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
R3Stretcher::createResampler()
|
||||||
|
{
|
||||||
|
Resampler::Parameters resamplerParameters;
|
||||||
|
resamplerParameters.quality = Resampler::FastestTolerable;
|
||||||
|
resamplerParameters.initialSampleRate = m_parameters.sampleRate;
|
||||||
|
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize;
|
||||||
|
|
||||||
|
if (isRealTime()) {
|
||||||
|
resamplerParameters.dynamism = Resampler::RatioOftenChanging;
|
||||||
|
resamplerParameters.ratioChange = Resampler::SmoothRatioChange;
|
||||||
|
} else {
|
||||||
|
// ratio can't be changed in offline mode
|
||||||
|
resamplerParameters.dynamism = Resampler::RatioMostlyFixed;
|
||||||
|
resamplerParameters.ratioChange = Resampler::SuddenRatioChange;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_resampler = std::unique_ptr<Resampler>
|
||||||
|
(new Resampler(resamplerParameters, m_parameters.channels));
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
R3Stretcher::calculateHop()
|
R3Stretcher::calculateHop()
|
||||||
{
|
{
|
||||||
@@ -398,7 +393,9 @@ void
|
|||||||
R3Stretcher::reset()
|
R3Stretcher::reset()
|
||||||
{
|
{
|
||||||
m_calculator->reset();
|
m_calculator->reset();
|
||||||
m_resampler->reset();
|
if (m_resampler) {
|
||||||
|
m_resampler->reset();
|
||||||
|
}
|
||||||
|
|
||||||
for (auto &it : m_scaleData) {
|
for (auto &it : m_scaleData) {
|
||||||
it.second->guided.reset();
|
it.second->guided.reset();
|
||||||
@@ -463,12 +460,36 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isRealTime() && !m_keyFrameMap.empty()) {
|
if (!isRealTime()) {
|
||||||
if (m_mode == ProcessMode::Studying) {
|
|
||||||
m_totalTargetDuration =
|
if (m_mode == ProcessMode::JustCreated ||
|
||||||
size_t(round(m_studyInputDuration * getEffectiveRatio()));
|
m_mode == ProcessMode::Studying) {
|
||||||
|
|
||||||
|
if (m_pitchScale != 1.0 && !m_resampler) {
|
||||||
|
createResampler();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pad to half of the longest frame. As with R2, in
|
||||||
|
// real-time mode we don't do this -- it's better to start
|
||||||
|
// with a swoosh than introduce more latency, and we don't
|
||||||
|
// want gaps when the ratio changes.
|
||||||
|
|
||||||
|
int pad = m_guideConfiguration.longestFftSize / 2;
|
||||||
|
m_log.log(1, "offline mode: prefilling with", pad);
|
||||||
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
||||||
|
m_channelData[c]->inbuf->zero(pad);
|
||||||
|
}
|
||||||
|
// By the time we skip this later we will have resampled
|
||||||
|
m_startSkip = int(round(pad / m_pitchScale));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!m_keyFrameMap.empty()) {
|
||||||
|
if (m_mode == ProcessMode::Studying) {
|
||||||
|
m_totalTargetDuration =
|
||||||
|
size_t(round(m_studyInputDuration * getEffectiveRatio()));
|
||||||
|
}
|
||||||
|
updateRatioFromMap();
|
||||||
}
|
}
|
||||||
updateRatioFromMap();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (final) {
|
if (final) {
|
||||||
@@ -891,7 +912,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
|
|
||||||
double ratio = getEffectiveRatio();
|
double ratio = getEffectiveRatio();
|
||||||
|
|
||||||
if (fabs(ratio - 1.0) < 1.0e-6) {
|
if (fabs(ratio - 1.0) < 1.0e-7) {
|
||||||
++m_unityCount;
|
++m_unityCount;
|
||||||
} else {
|
} else {
|
||||||
m_unityCount = 0;
|
m_unityCount = 0;
|
||||||
@@ -907,6 +928,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|||||||
cd->nextSegmentation,
|
cd->nextSegmentation,
|
||||||
v_mean(classifyScale->mag.data() + 1, classify/2),
|
v_mean(classifyScale->mag.data() + 1, classify/2),
|
||||||
m_unityCount,
|
m_unityCount,
|
||||||
|
isRealTime(),
|
||||||
cd->guidance);
|
cd->guidance);
|
||||||
/*
|
/*
|
||||||
if (c == 0) {
|
if (c == 0) {
|
||||||
@@ -1070,7 +1092,7 @@ R3Stretcher::synthesiseChannel(int c, int outhop)
|
|||||||
scaleData->fft.inverse(scale->real.data(),
|
scaleData->fft.inverse(scale->real.data(),
|
||||||
scale->imag.data(),
|
scale->imag.data(),
|
||||||
scale->timeDomain.data());
|
scale->timeDomain.data());
|
||||||
|
|
||||||
v_fftshift(scale->timeDomain.data(), fftSize);
|
v_fftshift(scale->timeDomain.data(), fftSize);
|
||||||
|
|
||||||
// Synthesis window may be shorter than analysis window, so
|
// Synthesis window may be shorter than analysis window, so
|
||||||
|
|||||||
@@ -310,6 +310,7 @@ protected:
|
|||||||
ProcessMode m_mode;
|
ProcessMode m_mode;
|
||||||
|
|
||||||
void consume();
|
void consume();
|
||||||
|
void createResampler();
|
||||||
void calculateHop();
|
void calculateHop();
|
||||||
void updateRatioFromMap();
|
void updateRatioFromMap();
|
||||||
void analyseChannel(int channel, int inhop, int prevInhop, int prevOuthop);
|
void analyseChannel(int channel, int inhop, int prevInhop, int prevOuthop);
|
||||||
|
|||||||
Reference in New Issue
Block a user