In offline mode, create the resampler only if needed (i.e. if the pitch ratio is still 1.0 at the point when process is first called); and use plain 2048-sample fft with unity stretch

This commit is contained in:
Chris Cannam
2022-06-24 10:51:40 +01:00
parent 3b168ca55e
commit d65755427f
3 changed files with 84 additions and 42 deletions

View File

@@ -127,7 +127,8 @@ public:
// This is the classification and fallback FFT: we need it to
// go up to Nyquist so we can seamlessly switch to it for
// longer stretches
// longer stretches, and down to 0.0 so we can use it for
// unity in offline mode
bandFftSize = roundUp(int(ceil(rate/32.0)));
m_configuration.fftBandLimits[1] =
BandLimits(bandFftSize, rate, 0.0, rate / 2.0);
@@ -154,6 +155,7 @@ public:
const BinSegmenter::Segmentation &nextSegmentation,
double meanMagnitude,
int unityCount,
bool realtime,
Guidance &guidance) const {
bool hadPhaseReset = guidance.phaseReset.present;
@@ -182,7 +184,8 @@ public:
hadPhaseReset,
unityCount,
magnitudes,
segmentation);
segmentation,
realtime);
return;
}
@@ -380,12 +383,28 @@ protected:
bool hadPhaseReset,
uint32_t unityCount,
const double *const magnitudes,
const BinSegmenter::Segmentation &segmentation) const {
const BinSegmenter::Segmentation &segmentation,
bool realtime) const {
// std::cout << "unity" << std::endl;
double nyquist = m_parameters.sampleRate / 2.0;
if (!realtime) {
// ratio can't change, so we are just running 1.0 ratio
// throughout
guidance.fftBands[0].f0 = 0.0;
guidance.fftBands[0].f1 = 0.0;
guidance.fftBands[1].f0 = 0.0;
guidance.fftBands[1].f1 = nyquist;
guidance.fftBands[2].f0 = nyquist;
guidance.fftBands[2].f1 = nyquist;
guidance.phaseReset.present = true;
guidance.phaseReset.f0 = 0.0;
guidance.phaseReset.f1 = nyquist;
return;
}
guidance.fftBands[0].f0 = 0.0;
guidance.fftBands[0].f1 = m_minLower;
guidance.fftBands[1].f0 = m_minLower;

View File

@@ -104,22 +104,13 @@ R3Stretcher::R3Stretcher(Parameters parameters,
1, false, // no fixed inputIncrement
m_log));
Resampler::Parameters resamplerParameters;
resamplerParameters.quality = Resampler::FastestTolerable;
if (isRealTime()) {
resamplerParameters.dynamism = Resampler::RatioOftenChanging;
resamplerParameters.ratioChange = Resampler::SmoothRatioChange;
} else {
// ratio can't be changed in offline mode
resamplerParameters.dynamism = Resampler::RatioMostlyFixed;
resamplerParameters.ratioChange = Resampler::SuddenRatioChange;
createResampler();
// In offline mode we don't create the resampler yet - we
// don't want to have one at all if the pitch ratio is 1.0,
// but that could change before the first process call, so we
// create the resampler if needed then
}
resamplerParameters.initialSampleRate = m_parameters.sampleRate;
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize; //!!!???
m_resampler = std::unique_ptr<Resampler>
(new Resampler(resamplerParameters, m_parameters.channels));
calculateHop();
@@ -132,23 +123,6 @@ R3Stretcher::R3Stretcher(Parameters parameters,
if (!m_timeRatio.is_lock_free()) {
m_log.log(0, "WARNING: std::atomic<double> is not lock-free");
}
// Pad to half of the longest frame. As with R2, in real-time mode
// we don't do this -- it's better to start with a swoosh than
// introduce more latency, and we don't want gaps when the ratio
// changes.
if (!isRealTime()) {
int pad = m_guideConfiguration.longestFftSize / 2;
m_log.log(1, "offline mode: prefilling with", pad);
for (int c = 0; c < m_parameters.channels; ++c) {
m_channelData[c]->inbuf->zero(pad);
}
// By the time we skip this later we will have resampled
m_startSkip = int(round(pad / m_pitchScale));
} else {
m_log.log(1, "realtime mode: no prefill");
}
}
WindowType
@@ -249,6 +223,27 @@ R3Stretcher::setKeyFrameMap(const std::map<size_t, size_t> &mapping)
m_keyFrameMap = mapping;
}
void
R3Stretcher::createResampler()
{
Resampler::Parameters resamplerParameters;
resamplerParameters.quality = Resampler::FastestTolerable;
resamplerParameters.initialSampleRate = m_parameters.sampleRate;
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize;
if (isRealTime()) {
resamplerParameters.dynamism = Resampler::RatioOftenChanging;
resamplerParameters.ratioChange = Resampler::SmoothRatioChange;
} else {
// ratio can't be changed in offline mode
resamplerParameters.dynamism = Resampler::RatioMostlyFixed;
resamplerParameters.ratioChange = Resampler::SuddenRatioChange;
}
m_resampler = std::unique_ptr<Resampler>
(new Resampler(resamplerParameters, m_parameters.channels));
}
void
R3Stretcher::calculateHop()
{
@@ -398,7 +393,9 @@ void
R3Stretcher::reset()
{
m_calculator->reset();
m_resampler->reset();
if (m_resampler) {
m_resampler->reset();
}
for (auto &it : m_scaleData) {
it.second->guided.reset();
@@ -463,12 +460,36 @@ R3Stretcher::process(const float *const *input, size_t samples, bool final)
return;
}
if (!isRealTime() && !m_keyFrameMap.empty()) {
if (m_mode == ProcessMode::Studying) {
m_totalTargetDuration =
size_t(round(m_studyInputDuration * getEffectiveRatio()));
if (!isRealTime()) {
if (m_mode == ProcessMode::JustCreated ||
m_mode == ProcessMode::Studying) {
if (m_pitchScale != 1.0 && !m_resampler) {
createResampler();
}
// Pad to half of the longest frame. As with R2, in
// real-time mode we don't do this -- it's better to start
// with a swoosh than introduce more latency, and we don't
// want gaps when the ratio changes.
int pad = m_guideConfiguration.longestFftSize / 2;
m_log.log(1, "offline mode: prefilling with", pad);
for (int c = 0; c < m_parameters.channels; ++c) {
m_channelData[c]->inbuf->zero(pad);
}
// By the time we skip this later we will have resampled
m_startSkip = int(round(pad / m_pitchScale));
}
if (!m_keyFrameMap.empty()) {
if (m_mode == ProcessMode::Studying) {
m_totalTargetDuration =
size_t(round(m_studyInputDuration * getEffectiveRatio()));
}
updateRatioFromMap();
}
updateRatioFromMap();
}
if (final) {
@@ -891,7 +912,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
double ratio = getEffectiveRatio();
if (fabs(ratio - 1.0) < 1.0e-6) {
if (fabs(ratio - 1.0) < 1.0e-7) {
++m_unityCount;
} else {
m_unityCount = 0;
@@ -907,6 +928,7 @@ R3Stretcher::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
cd->nextSegmentation,
v_mean(classifyScale->mag.data() + 1, classify/2),
m_unityCount,
isRealTime(),
cd->guidance);
/*
if (c == 0) {
@@ -1070,7 +1092,7 @@ R3Stretcher::synthesiseChannel(int c, int outhop)
scaleData->fft.inverse(scale->real.data(),
scale->imag.data(),
scale->timeDomain.data());
v_fftshift(scale->timeDomain.data(), fftSize);
// Synthesis window may be shorter than analysis window, so

View File

@@ -310,6 +310,7 @@ protected:
ProcessMode m_mode;
void consume();
void createResampler();
void calculateHop();
void updateRatioFromMap();
void analyseChannel(int channel, int inhop, int prevInhop, int prevOuthop);