1203 lines
41 KiB
C++
1203 lines
41 KiB
C++
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
|
|
|
/*
|
|
Rubber Band Library
|
|
An audio time-stretching and pitch-shifting library.
|
|
Copyright 2007-2024 Particular Programs Ltd.
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of the
|
|
License, or (at your option) any later version. See the file
|
|
COPYING included with this distribution for more information.
|
|
|
|
Alternatively, if you have a valid commercial licence for the
|
|
Rubber Band Library obtained by agreement with the copyright
|
|
holders, you may redistribute and/or modify it under the terms
|
|
described in that licence.
|
|
|
|
If you wish to distribute code using the Rubber Band Library
|
|
under terms other than those of the GNU General Public License,
|
|
you must obtain a valid commercial licence before doing so.
|
|
*/
|
|
|
|
#include "R3LiveShifter.h"
|
|
|
|
#include "../common/VectorOpsComplex.h"
|
|
#include "../common/Profiler.h"
|
|
|
|
#include <array>
|
|
|
|
namespace RubberBand {
|
|
|
|
R3LiveShifter::R3LiveShifter(Parameters parameters, Log log) :
|
|
m_log(log),
|
|
m_parameters(validateSampleRate(parameters)),
|
|
m_limits(parameters.options, m_parameters.sampleRate),
|
|
m_pitchScale(1.0),
|
|
m_formantScale(0.0),
|
|
m_guide(Guide::Parameters
|
|
(m_parameters.sampleRate, true),
|
|
m_log),
|
|
m_guideConfiguration(m_guide.getConfiguration()),
|
|
m_channelAssembly(m_parameters.channels),
|
|
m_initialResamplerDelays(32, 32),
|
|
m_useReadahead(false),
|
|
m_prevInhop(m_limits.maxInhopWithReadahead / 2),
|
|
m_prevOuthop(m_prevInhop),
|
|
m_firstProcess(true),
|
|
m_unityCount(0)
|
|
{
|
|
Profiler profiler("R3LiveShifter::R3LiveShifter");
|
|
initialise();
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::initialise()
|
|
{
|
|
m_log.log(1, "R3LiveShifter::R3LiveShifter: rate, options",
|
|
m_parameters.sampleRate, m_parameters.options);
|
|
|
|
if (!isSingleWindowed()) {
|
|
m_log.log(1, "R3LiveShifter::R3LiveShifter: multi window enabled");
|
|
}
|
|
|
|
if (m_parameters.options & RubberBandLiveShifter::OptionWindowMedium) {
|
|
m_log.log(1, "R3LiveShifter::R3LiveShifter: readahead enabled");
|
|
m_useReadahead = true;
|
|
}
|
|
|
|
double maxClassifierFrequency = 16000.0;
|
|
if (maxClassifierFrequency > m_parameters.sampleRate/2) {
|
|
maxClassifierFrequency = m_parameters.sampleRate/2;
|
|
}
|
|
int classificationBins =
|
|
int(floor(m_guideConfiguration.classificationFftSize *
|
|
maxClassifierFrequency / m_parameters.sampleRate));
|
|
|
|
BinSegmenter::Parameters segmenterParameters
|
|
(m_guideConfiguration.classificationFftSize,
|
|
classificationBins, m_parameters.sampleRate, 18);
|
|
|
|
BinClassifier::Parameters classifierParameters
|
|
(classificationBins, 9, 1, 10, 2.0, 2.0);
|
|
|
|
if (isSingleWindowed()) {
|
|
classifierParameters.horizontalFilterLength = 7;
|
|
}
|
|
|
|
int inRingBufferSize = getWindowSourceSize() * 4;
|
|
int outRingBufferSize = getWindowSourceSize() * 4;
|
|
|
|
m_channelData.clear();
|
|
|
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
|
m_channelData.push_back(std::make_shared<ChannelData>
|
|
(segmenterParameters,
|
|
classifierParameters,
|
|
m_guideConfiguration.longestFftSize,
|
|
getWindowSourceSize(),
|
|
inRingBufferSize,
|
|
outRingBufferSize));
|
|
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
|
|
const auto &band = m_guideConfiguration.fftBandLimits[b];
|
|
int fftSize = band.fftSize;
|
|
m_channelData[c]->scales[fftSize] =
|
|
std::make_shared<ChannelScaleData>
|
|
(fftSize, m_guideConfiguration.longestFftSize);
|
|
}
|
|
m_channelData[c]->guidance.phaseReset.present = true;
|
|
m_channelData[c]->guidance.phaseReset.f0 = 0.0;
|
|
m_channelData[c]->guidance.phaseReset.f1 = m_parameters.sampleRate / 2.0;
|
|
}
|
|
|
|
m_scaleData.clear();
|
|
|
|
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
|
|
const auto &band = m_guideConfiguration.fftBandLimits[b];
|
|
int fftSize = band.fftSize;
|
|
GuidedPhaseAdvance::Parameters guidedParameters
|
|
(fftSize, m_parameters.sampleRate, m_parameters.channels,
|
|
isSingleWindowed());
|
|
m_scaleData[fftSize] = std::make_shared<ScaleData>
|
|
(guidedParameters, m_log);
|
|
}
|
|
|
|
createResamplers();
|
|
|
|
if (!m_pitchScale.is_lock_free()) {
|
|
m_log.log(0, "R3LiveShifter: WARNING: std::atomic<double> is not lock-free");
|
|
}
|
|
}
|
|
|
|
WindowType
|
|
R3LiveShifter::ScaleData::analysisWindowShape()
|
|
{
|
|
if (singleWindowMode) {
|
|
return HannWindow;
|
|
} else {
|
|
if (fftSize < 1024 || fftSize > 2048) return HannWindow;
|
|
else return NiemitaloForwardWindow;
|
|
}
|
|
}
|
|
|
|
int
|
|
R3LiveShifter::ScaleData::analysisWindowLength()
|
|
{
|
|
return fftSize;
|
|
}
|
|
|
|
WindowType
|
|
R3LiveShifter::ScaleData::synthesisWindowShape()
|
|
{
|
|
if (singleWindowMode) {
|
|
return HannWindow;
|
|
} else {
|
|
if (fftSize < 1024 || fftSize > 2048) return HannWindow;
|
|
else return NiemitaloReverseWindow;
|
|
}
|
|
}
|
|
|
|
int
|
|
R3LiveShifter::ScaleData::synthesisWindowLength()
|
|
{
|
|
if (singleWindowMode) {
|
|
return fftSize;
|
|
} else {
|
|
if (fftSize > 2048) return fftSize/2;
|
|
else return fftSize;
|
|
}
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::setPitchScale(double scale)
|
|
{
|
|
m_log.log(2, "R3LiveShifter::setPitchScale", scale);
|
|
|
|
if (scale == m_pitchScale) return;
|
|
m_pitchScale = scale;
|
|
|
|
if (m_firstProcess) {
|
|
measureResamplerDelay();
|
|
}
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::setFormantScale(double scale)
|
|
{
|
|
m_log.log(2, "R3LiveShifter::setFormantScale", scale);
|
|
m_formantScale = scale;
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::setFormantOption(RubberBandLiveShifter::Options options)
|
|
{
|
|
int mask = (RubberBandLiveShifter::OptionFormantShifted |
|
|
RubberBandLiveShifter::OptionFormantPreserved);
|
|
m_parameters.options &= ~mask;
|
|
options &= mask;
|
|
m_parameters.options |= options;
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::createResamplers()
|
|
{
|
|
Profiler profiler("R3LiveShifter::createResamplers");
|
|
|
|
Resampler::Parameters resamplerParameters;
|
|
resamplerParameters.quality = Resampler::FastestTolerable;
|
|
resamplerParameters.initialSampleRate = m_parameters.sampleRate;
|
|
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize;
|
|
resamplerParameters.dynamism = Resampler::RatioOftenChanging;
|
|
resamplerParameters.ratioChange = Resampler::SuddenRatioChange;
|
|
|
|
int debug = m_log.getDebugLevel();
|
|
if (debug > 0) --debug;
|
|
resamplerParameters.debugLevel = debug;
|
|
|
|
m_inResampler = std::unique_ptr<Resampler>
|
|
(new Resampler(resamplerParameters, m_parameters.channels));
|
|
|
|
m_outResampler = std::unique_ptr<Resampler>
|
|
(new Resampler(resamplerParameters, m_parameters.channels));
|
|
|
|
measureResamplerDelay();
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::measureResamplerDelay()
|
|
{
|
|
// Delay in the sense that the resampler at first returns fewer
|
|
// samples than requested, not that the samples are delayed within
|
|
// the returned buffer relative to their positions at input. We
|
|
// actually add the delay ourselves in the first block because we
|
|
// need a fixed block size and the resampler doesn't return one.
|
|
|
|
int bs = getBlockSize();
|
|
std::vector<float> inbuf(bs * m_parameters.channels, 0.f);
|
|
auto outbuf = inbuf;
|
|
|
|
int incount = m_inResampler->resampleInterleaved
|
|
(outbuf.data(), bs, inbuf.data(), bs, getInRatio(), false);
|
|
m_inResampler->reset();
|
|
|
|
int outcount = m_outResampler->resampleInterleaved
|
|
(outbuf.data(), bs, inbuf.data(), bs, getOutRatio(), false);
|
|
m_outResampler->reset();
|
|
|
|
m_initialResamplerDelays = { bs - incount, bs - outcount };
|
|
|
|
m_log.log(1, "R3LiveShifter::measureResamplerDelay: inRatio, outRatio ", getInRatio(), getOutRatio());
|
|
m_log.log(1, "R3LiveShifter::measureResamplerDelay: measured delays ", m_initialResamplerDelays.first, m_initialResamplerDelays.second);
|
|
}
|
|
|
|
double
|
|
R3LiveShifter::getPitchScale() const
|
|
{
|
|
return m_pitchScale;
|
|
}
|
|
|
|
double
|
|
R3LiveShifter::getFormantScale() const
|
|
{
|
|
return m_formantScale;
|
|
}
|
|
|
|
size_t
|
|
R3LiveShifter::getPreferredStartPad() const
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
size_t
|
|
R3LiveShifter::getStartDelay() const
|
|
{
|
|
int inDelay = getWindowSourceSize() + m_initialResamplerDelays.first;
|
|
int outDelay = int(floor(inDelay * getOutRatio())) + m_initialResamplerDelays.second;
|
|
|
|
int total = outDelay;
|
|
int bs = getBlockSize();
|
|
if (m_pitchScale > 1.0) {
|
|
total += bs * (m_pitchScale - 1.0);
|
|
} else if (m_pitchScale < 1.0) {
|
|
total -= bs * (1.0 / m_pitchScale - 1.0);
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::getStartDelay: inDelay, outDelay", inDelay, outDelay);
|
|
m_log.log(1, "R3LiveShifter::getStartDelay", total);
|
|
return total;
|
|
}
|
|
|
|
size_t
|
|
R3LiveShifter::getChannelCount() const
|
|
{
|
|
return m_parameters.channels;
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::reset()
|
|
{
|
|
m_inResampler->reset();
|
|
m_outResampler->reset();
|
|
|
|
m_unityCount = 0;
|
|
|
|
m_prevInhop = m_limits.maxInhopWithReadahead / 2;
|
|
m_prevOuthop = int(round(m_prevInhop * m_pitchScale));
|
|
m_firstProcess = true;
|
|
|
|
for (auto &it : m_scaleData) {
|
|
it.second->guided.reset();
|
|
}
|
|
|
|
for (auto &cd : m_channelData) {
|
|
cd->reset();
|
|
}
|
|
|
|
measureResamplerDelay();
|
|
}
|
|
|
|
size_t
|
|
R3LiveShifter::getBlockSize() const
|
|
{
|
|
return 512;
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::shift(const float *const *input, float *const *output)
|
|
{
|
|
Profiler profiler("R3LiveShifter::shift");
|
|
|
|
int incount = int(getBlockSize());
|
|
|
|
m_log.log(2, "R3LiveShifter::shift: start of shift with incount", incount);
|
|
m_log.log(2, "R3LiveShifter::shift: initially in inbuf", m_channelData[0]->inbuf->getReadSpace());
|
|
m_log.log(2, "R3LiveShifter::shift: initially in outbuf", m_channelData[0]->outbuf->getReadSpace());
|
|
|
|
int pad = 0;
|
|
if (m_firstProcess) {
|
|
pad = getWindowSourceSize();
|
|
if (m_pitchScale > 1.0) {
|
|
pad = int(ceil(pad * m_pitchScale));
|
|
}
|
|
m_log.log(2, "R3LiveShifter::shift: extending input with pre-pad", incount, pad);
|
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
|
m_channelData[c]->inbuf->zero(pad);
|
|
}
|
|
}
|
|
|
|
readIn(input);
|
|
|
|
double outRatio = getOutRatio();
|
|
int requiredInOutbuf = int(ceil(incount / outRatio));
|
|
generate(requiredInOutbuf);
|
|
|
|
int got = readOut(output, incount);
|
|
|
|
if (got < incount) {
|
|
m_log.log(0, "R3LiveShifter::shift: ERROR: internal error: insufficient data at output (wanted, got)", incount, got);
|
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
|
for (int i = got; i < incount; ++i) {
|
|
if (i > 0) output[c][i] = output[c][i-1] * 0.9f;
|
|
else output[c][i] = 0.f;
|
|
}
|
|
}
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::shift: end of process with incount", incount);
|
|
m_log.log(2, "R3LiveShifter::shift: remaining in inbuf", m_channelData[0]->inbuf->getReadSpace());
|
|
m_log.log(2, "R3LiveShifter::shift: remaining in outbuf", m_channelData[0]->outbuf->getReadSpace());
|
|
m_log.log(2, "R3LiveShifter::shift: returning", got);
|
|
|
|
m_firstProcess = false;
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::readIn(const float *const *input)
|
|
{
|
|
int incount = int(getBlockSize());
|
|
|
|
int ws = m_channelData[0]->inbuf->getWriteSpace();
|
|
if (ws < incount) {
|
|
m_log.log(0, "R3LiveShifter::process: ERROR: internal error: insufficient space in inbuf (wanted, got)", incount, ws);
|
|
return;
|
|
}
|
|
|
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
|
auto &cd = m_channelData.at(c);
|
|
m_channelAssembly.resampled[c] = cd->resampled.data();
|
|
}
|
|
|
|
if (useMidSide()) {
|
|
auto &c0 = m_channelData.at(0)->mixdown;
|
|
auto &c1 = m_channelData.at(1)->mixdown;
|
|
for (int i = 0; i < incount; ++i) {
|
|
float l = input[0][i];
|
|
float r = input[1][i];
|
|
float m = (l + r) / 2.f;
|
|
float s = (l - r) / 2.f;
|
|
c0[i] = m;
|
|
c1[i] = s;
|
|
}
|
|
m_channelAssembly.input[0] = m_channelData.at(0)->mixdown.data();
|
|
m_channelAssembly.input[1] = m_channelData.at(1)->mixdown.data();
|
|
} else {
|
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
|
m_channelAssembly.input[c] = input[c];
|
|
}
|
|
}
|
|
|
|
double inRatio = getInRatio();
|
|
m_log.log(2, "R3LiveShifter::readIn: ratio", inRatio);
|
|
|
|
int resampleBufSize = int(m_channelData.at(0)->resampled.size());
|
|
|
|
int resampleOutput = m_inResampler->resample
|
|
(m_channelAssembly.resampled.data(),
|
|
resampleBufSize,
|
|
m_channelAssembly.input.data(),
|
|
incount,
|
|
inRatio,
|
|
false);
|
|
|
|
m_log.log(2, "R3LiveShifter::readIn: writing to inbuf from resampled data, former read space and samples being added", m_channelData[0]->inbuf->getReadSpace(), resampleOutput);
|
|
|
|
if (m_firstProcess) {
|
|
int expected = floor(incount * inRatio);
|
|
if (resampleOutput < expected) {
|
|
m_log.log(2, "R3LiveShifter::readIn: resampler left us short on first process, pre-padding output: expected and obtained", expected, resampleOutput);
|
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
|
m_channelData[c]->inbuf->zero(expected - resampleOutput);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
|
m_channelData[c]->inbuf->write
|
|
(m_channelData.at(c)->resampled.data(),
|
|
resampleOutput);
|
|
}
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::generate(int requiredInOutbuf)
|
|
{
|
|
Profiler profiler("R3LiveShifter::generate");
|
|
|
|
auto &cd0 = m_channelData.at(0);
|
|
int alreadyGenerated = cd0->outbuf->getReadSpace();
|
|
if (alreadyGenerated >= requiredInOutbuf) {
|
|
m_log.log(2, "R3LiveShifter::generate: have already generated required count", alreadyGenerated, requiredInOutbuf);
|
|
return;
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::generate: alreadyGenerated, requiredInOutbuf", alreadyGenerated, requiredInOutbuf);
|
|
|
|
int toGenerate = requiredInOutbuf - alreadyGenerated;
|
|
|
|
int channels = m_parameters.channels;
|
|
|
|
int ws = getWindowSourceSize();
|
|
|
|
// We always leave at least ws in inbuf, and this function is
|
|
// called after some input has just been written to inbuf, so we
|
|
// must have more than ws samples in there.
|
|
|
|
int atInput = cd0->inbuf->getReadSpace();
|
|
if (atInput <= ws) {
|
|
m_log.log(2, "R3LiveShifter::generate: insufficient samples at input: have and require more than", atInput, ws);
|
|
return;
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::generate: atInput, toLeave", atInput, ws);
|
|
|
|
int toConsume = atInput - ws;
|
|
|
|
m_log.log(2, "R3LiveShifter::generate: toConsume, toGenerate", toConsume, toGenerate);
|
|
|
|
int indicativeInhop = m_limits.maxInhopWithReadahead;
|
|
int indicativeOuthop = m_limits.maxPreferredOuthop;
|
|
|
|
int minHopsAtInput = 1 + (toConsume - 1) / indicativeInhop;
|
|
int minHopsAtOutput = 1 + (toGenerate - 1) / indicativeOuthop;
|
|
|
|
int hops = std::max(minHopsAtInput, minHopsAtOutput);
|
|
|
|
m_log.log(2, "R3LiveShifter::generate: indicative inhop, outhop", indicativeInhop, indicativeOuthop);
|
|
m_log.log(2, "R3LiveShifter::generate: minHopsAtInput, minHopsAtOutput", minHopsAtInput, minHopsAtOutput);
|
|
m_log.log(2, "R3LiveShifter::generate: hops", hops);
|
|
|
|
for (int hop = 0; hop < hops; ++hop) {
|
|
|
|
Profiler profiler2("R3LiveShifter::generate/loop");
|
|
|
|
if (toConsume <= 0) {
|
|
m_log.log(2, "R3LiveShifter::generate: ERROR: toConsume is zero at top of loop, hop and hops", hop, hops);
|
|
break;
|
|
}
|
|
|
|
int inhop = 1 + (toConsume - 1) / hops;
|
|
int outhop = 1 + (toGenerate - 1) / hops;
|
|
|
|
if (inhop > toConsume) {
|
|
inhop = toConsume;
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::generate: inhop, outhop", inhop, outhop);
|
|
|
|
// Now inhop is the distance by which the input stream will be
|
|
// advanced after our current frame has been read, and outhop
|
|
// is the distance by which the output will be advanced after
|
|
// it has been emitted; m_prevInhop and m_prevOuthop are the
|
|
// corresponding values the last time a frame was processed.
|
|
//
|
|
// Our phase adjustments need to be based on the distances we
|
|
// have advanced the input and output since the previous
|
|
// frame, not the distances we are about to advance them, so
|
|
// they use the m_prev values.
|
|
|
|
if (inhop != m_prevInhop) {
|
|
m_log.log(2, "R3LiveShifter::generate: change in inhop", m_prevInhop, inhop);
|
|
}
|
|
if (outhop != m_prevOuthop) {
|
|
m_log.log(2, "R3LiveShifter::generate: change in outhop", m_prevOuthop, outhop);
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::generate: write space and outhop", cd0->outbuf->getWriteSpace(), outhop);
|
|
|
|
// NB our ChannelData, ScaleData, and ChannelScaleData maps
|
|
// contain shared_ptrs; whenever we retain one of them in a
|
|
// variable, we do so by reference to avoid copying the
|
|
// shared_ptr (as that is not realtime safe). Same goes for
|
|
// the map iterators
|
|
|
|
// Analysis
|
|
|
|
for (int c = 0; c < channels; ++c) {
|
|
analyseChannel(c, inhop, m_prevInhop, m_prevOuthop);
|
|
}
|
|
|
|
// Phase update. This is synchronised across all channels
|
|
|
|
for (auto &it : m_channelData[0]->scales) {
|
|
int fftSize = it.first;
|
|
for (int c = 0; c < channels; ++c) {
|
|
auto &cd = m_channelData.at(c);
|
|
auto &scale = cd->scales.at(fftSize);
|
|
m_channelAssembly.mag[c] = scale->mag.data();
|
|
m_channelAssembly.phase[c] = scale->phase.data();
|
|
m_channelAssembly.prevMag[c] = scale->prevMag.data();
|
|
m_channelAssembly.guidance[c] = &cd->guidance;
|
|
m_channelAssembly.outPhase[c] = scale->advancedPhase.data();
|
|
}
|
|
m_scaleData.at(fftSize)->guided.advance
|
|
(m_channelAssembly.outPhase.data(),
|
|
m_channelAssembly.mag.data(),
|
|
m_channelAssembly.phase.data(),
|
|
m_channelAssembly.prevMag.data(),
|
|
m_guideConfiguration,
|
|
m_channelAssembly.guidance.data(),
|
|
useMidSide(),
|
|
m_prevInhop,
|
|
m_prevOuthop);
|
|
}
|
|
|
|
for (int c = 0; c < channels; ++c) {
|
|
adjustPreKick(c);
|
|
}
|
|
|
|
// Resynthesis
|
|
|
|
for (int c = 0; c < channels; ++c) {
|
|
synthesiseChannel(c, outhop, false);
|
|
}
|
|
|
|
// Emit
|
|
|
|
int writeCount = outhop;
|
|
int advanceCount = inhop;
|
|
|
|
for (int c = 0; c < channels; ++c) {
|
|
auto &cd = m_channelData.at(c);
|
|
cd->outbuf->write(cd->mixdown.data(), writeCount);
|
|
cd->inbuf->skip(advanceCount);
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::generate: writing and advancing", writeCount, advanceCount);
|
|
|
|
m_prevInhop = inhop;
|
|
m_prevOuthop = outhop;
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::generate: returning, now have generated", cd0->outbuf->getReadSpace());
|
|
}
|
|
|
|
int
|
|
R3LiveShifter::readOut(float *const *output, int outcount)
|
|
{
|
|
double outRatio = getOutRatio();
|
|
m_log.log(2, "R3LiveShifter::readOut: outcount and ratio", outcount, outRatio);
|
|
|
|
int resampledCount = 0;
|
|
bool fillingTail = false;
|
|
|
|
while (resampledCount < outcount) {
|
|
|
|
int fromOutbuf;
|
|
|
|
if (fillingTail) {
|
|
fromOutbuf = 1;
|
|
} else {
|
|
fromOutbuf = int(floor(outcount / outRatio));
|
|
if (fromOutbuf == 0) {
|
|
fromOutbuf = 1;
|
|
}
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::readOut: fillingTail and fromOutbuf", fillingTail, fromOutbuf);
|
|
|
|
int got = fromOutbuf;
|
|
|
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
|
auto &cd = m_channelData.at(c);
|
|
int available = cd->outbuf->getReadSpace();
|
|
int gotHere = cd->outbuf->read
|
|
(cd->resampled.data(), std::min(got, available));
|
|
if (gotHere < got) {
|
|
if (c > 0) {
|
|
m_log.log(0, "R3LiveShifter::readOut: WARNING: channel imbalance detected");
|
|
}
|
|
}
|
|
got = std::min(got, std::max(gotHere, 0));
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::readOut: requested and got from outbufs", fromOutbuf, got);
|
|
m_log.log(2, "R3LiveShifter::readOut: leaving behind", m_channelData.at(0)->outbuf->getReadSpace());
|
|
|
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
|
auto &cd = m_channelData.at(c);
|
|
m_channelAssembly.resampled[c] = cd->resampled.data();
|
|
m_channelAssembly.mixdown[c] = output[c] + resampledCount;
|
|
}
|
|
|
|
auto resampledHere = m_outResampler->resample
|
|
(m_channelAssembly.mixdown.data(),
|
|
outcount - resampledCount,
|
|
m_channelAssembly.resampled.data(),
|
|
got,
|
|
outRatio,
|
|
false);
|
|
|
|
m_log.log(2, "R3LiveShifter::readOut: resampledHere", resampledHere);
|
|
|
|
if (got == 0 && resampledHere == 0) {
|
|
m_log.log(2, "R3LiveShifter::readOut: made no progress, finishing");
|
|
break;
|
|
}
|
|
|
|
resampledCount += resampledHere;
|
|
|
|
fillingTail = true;
|
|
}
|
|
|
|
if (useMidSide()) {
|
|
for (int i = 0; i < resampledCount; ++i) {
|
|
float m = output[0][i];
|
|
float s = output[1][i];
|
|
float l = m + s;
|
|
float r = m - s;
|
|
output[0][i] = l;
|
|
output[1][i] = r;
|
|
}
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::readOut: resampled to", resampledCount);
|
|
|
|
if (resampledCount < outcount) {
|
|
if (m_firstProcess) {
|
|
m_log.log(2, "R3LiveShifter::readOut: resampler left us short on first process, pre-padding output: expected and obtained", outcount, resampledCount);
|
|
int prepad = outcount - resampledCount;
|
|
for (int c = 0; c < m_parameters.channels; ++c) {
|
|
v_move(output[c] + prepad, output[c], resampledCount);
|
|
v_zero(output[c], prepad);
|
|
}
|
|
resampledCount = outcount;
|
|
} else {
|
|
m_log.log(0, "R3LiveShifter::readOut: WARNING: Failed to obtain enough samples from resampler", resampledCount, outcount);
|
|
}
|
|
}
|
|
|
|
m_log.log(2, "R3LiveShifter::readOut: returning", resampledCount);
|
|
|
|
return resampledCount;
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::analyseChannel(int c, int inhop, int prevInhop, int prevOuthop)
|
|
{
|
|
Profiler profiler("R3LiveShifter::analyseChannel");
|
|
|
|
auto &cd = m_channelData.at(c);
|
|
|
|
int sourceSize = cd->windowSource.size();
|
|
process_t *buf = cd->windowSource.data();
|
|
|
|
int readSpace = cd->inbuf->getReadSpace();
|
|
if (readSpace < sourceSize) {
|
|
cd->inbuf->peek(buf, readSpace);
|
|
v_zero(buf + readSpace, sourceSize - readSpace);
|
|
} else {
|
|
cd->inbuf->peek(buf, sourceSize);
|
|
}
|
|
|
|
// We have an unwindowed time-domain frame in buf that is as long
|
|
// as required for the union of all FFT sizes and readahead
|
|
// hops. Populate the various sizes from it with aligned centres,
|
|
// windowing as we copy. The classification scale is handled
|
|
// separately because it has readahead, so skip it here. (In
|
|
// single-window mode that means we do nothing here, since the
|
|
// classification scale is the only one.)
|
|
|
|
int longest = m_guideConfiguration.longestFftSize;
|
|
int classify = m_guideConfiguration.classificationFftSize;
|
|
|
|
for (auto &it: cd->scales) {
|
|
int fftSize = it.first;
|
|
if (fftSize == classify) continue;
|
|
int offset = (longest - fftSize) / 2;
|
|
m_scaleData.at(fftSize)->analysisWindow.cut
|
|
(buf + offset, it.second->timeDomain.data());
|
|
}
|
|
|
|
auto &classifyScale = cd->scales.at(classify);
|
|
ClassificationReadaheadData &readahead = cd->readahead;
|
|
bool copyFromReadahead = false;
|
|
|
|
if (m_useReadahead) {
|
|
|
|
// The classification scale has a one-hop readahead, so
|
|
// populate the readahead from further down the long
|
|
// unwindowed frame.
|
|
|
|
m_scaleData.at(classify)->analysisWindow.cut
|
|
(buf + (longest - classify) / 2 + inhop,
|
|
readahead.timeDomain.data());
|
|
|
|
// If inhop has changed since the previous frame, we must
|
|
// populate the classification scale (but for
|
|
// analysis/resynthesis rather than classification) anew
|
|
// rather than reuse the previous frame's readahead.
|
|
|
|
copyFromReadahead = cd->haveReadahead;
|
|
if (inhop != prevInhop) copyFromReadahead = false;
|
|
}
|
|
|
|
if (!copyFromReadahead) {
|
|
m_scaleData.at(classify)->analysisWindow.cut
|
|
(buf + (longest - classify) / 2,
|
|
classifyScale->timeDomain.data());
|
|
}
|
|
|
|
// FFT shift, forward FFT, and carry out cartesian-polar
|
|
// conversion for each FFT size.
|
|
|
|
// For the classification scale we need magnitudes for the full
|
|
// range (polar only in a subset) and we operate in the readahead,
|
|
// pulling current values from the existing readahead (except
|
|
// where the inhop has changed as above, in which case we need to
|
|
// do both readahead and current)
|
|
|
|
if (m_useReadahead) {
|
|
|
|
if (copyFromReadahead) {
|
|
v_copy(classifyScale->mag.data(),
|
|
readahead.mag.data(),
|
|
classifyScale->bufSize);
|
|
v_copy(classifyScale->phase.data(),
|
|
readahead.phase.data(),
|
|
classifyScale->bufSize);
|
|
}
|
|
|
|
v_fftshift(readahead.timeDomain.data(), classify);
|
|
m_scaleData.at(classify)->fft.forward(readahead.timeDomain.data(),
|
|
classifyScale->real.data(),
|
|
classifyScale->imag.data());
|
|
|
|
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
|
|
const auto &band = m_guideConfiguration.fftBandLimits[b];
|
|
if (band.fftSize == classify) {
|
|
ToPolarSpec spec;
|
|
spec.magFromBin = 0;
|
|
spec.magBinCount = classify/2 + 1;
|
|
spec.polarFromBin = band.b0min;
|
|
spec.polarBinCount = band.b1max - band.b0min + 1;
|
|
convertToPolar(readahead.mag.data(),
|
|
readahead.phase.data(),
|
|
classifyScale->real.data(),
|
|
classifyScale->imag.data(),
|
|
spec);
|
|
|
|
v_scale(classifyScale->mag.data(),
|
|
1.0 / double(classify),
|
|
classifyScale->mag.size());
|
|
break;
|
|
}
|
|
}
|
|
|
|
cd->haveReadahead = true;
|
|
}
|
|
|
|
// For the others (and the classify as well, if the inhop has
|
|
// changed or we aren't using readahead or haven't filled the
|
|
// readahead yet) we operate directly in the scale data and
|
|
// restrict the range for cartesian-polar conversion
|
|
|
|
for (auto &it: cd->scales) {
|
|
int fftSize = it.first;
|
|
if (fftSize == classify && copyFromReadahead) {
|
|
continue;
|
|
}
|
|
|
|
auto &scale = it.second;
|
|
|
|
v_fftshift(scale->timeDomain.data(), fftSize);
|
|
|
|
m_scaleData.at(fftSize)->fft.forward(scale->timeDomain.data(),
|
|
scale->real.data(),
|
|
scale->imag.data());
|
|
|
|
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
|
|
const auto &band = m_guideConfiguration.fftBandLimits[b];
|
|
if (band.fftSize == fftSize) {
|
|
|
|
ToPolarSpec spec;
|
|
|
|
// For the classify scale we always want the full
|
|
// range, as all the magnitudes (though not
|
|
// necessarily all phases) are potentially relevant to
|
|
// classification and formant analysis. But this case
|
|
// here only happens if we don't copyFromReadahead -
|
|
// the normal case is above and, er, copies from the
|
|
// previous readahead.
|
|
if (fftSize == classify) {
|
|
spec.magFromBin = 0;
|
|
spec.magBinCount = classify/2 + 1;
|
|
spec.polarFromBin = band.b0min;
|
|
spec.polarBinCount = band.b1max - band.b0min + 1;
|
|
} else {
|
|
spec.magFromBin = band.b0min;
|
|
spec.magBinCount = band.b1max - band.b0min + 1;
|
|
spec.polarFromBin = spec.magFromBin;
|
|
spec.polarBinCount = spec.magBinCount;
|
|
}
|
|
|
|
convertToPolar(scale->mag.data(),
|
|
scale->phase.data(),
|
|
scale->real.data(),
|
|
scale->imag.data(),
|
|
spec);
|
|
|
|
v_scale(scale->mag.data() + spec.magFromBin,
|
|
1.0 / double(fftSize),
|
|
spec.magBinCount);
|
|
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (m_parameters.options & RubberBandLiveShifter::OptionFormantPreserved) {
|
|
analyseFormant(c);
|
|
adjustFormant(c);
|
|
}
|
|
|
|
// Use the classification scale to get a bin segmentation and
|
|
// calculate the adaptive frequency guide for this channel
|
|
|
|
v_copy(cd->classification.data(), cd->nextClassification.data(),
|
|
cd->classification.size());
|
|
|
|
if (m_useReadahead) {
|
|
cd->classifier->classify(readahead.mag.data(),
|
|
cd->nextClassification.data());
|
|
} else {
|
|
cd->classifier->classify(classifyScale->mag.data(),
|
|
cd->nextClassification.data());
|
|
}
|
|
|
|
cd->prevSegmentation = cd->segmentation;
|
|
cd->segmentation = cd->nextSegmentation;
|
|
cd->nextSegmentation = cd->segmenter->segment(cd->nextClassification.data());
|
|
/*
|
|
if (c == 0) {
|
|
double pb = cd->nextSegmentation.percussiveBelow;
|
|
double pa = cd->nextSegmentation.percussiveAbove;
|
|
double ra = cd->nextSegmentation.residualAbove;
|
|
int pbb = binForFrequency(pb, classify, m_parameters.sampleRate);
|
|
int pab = binForFrequency(pa, classify, m_parameters.sampleRate);
|
|
int rab = binForFrequency(ra, classify, m_parameters.sampleRate);
|
|
std::cout << "pb = " << pb << ", pbb = " << pbb << std::endl;
|
|
std::cout << "pa = " << pa << ", pab = " << pab << std::endl;
|
|
std::cout << "ra = " << ra << ", rab = " << rab << std::endl;
|
|
std::cout << "s:";
|
|
for (int i = 0; i <= classify/2; ++i) {
|
|
if (i > 0) std::cout << ",";
|
|
if (i < pbb || (i >= pab && i <= rab)) {
|
|
std::cout << "1";
|
|
} else {
|
|
std::cout << "0";
|
|
}
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
*/
|
|
|
|
double ratio = m_pitchScale;
|
|
|
|
if (fabs(ratio - 1.0) < 1.0e-7) {
|
|
++m_unityCount;
|
|
} else {
|
|
m_unityCount = 0;
|
|
}
|
|
|
|
bool tighterChannelLock =
|
|
m_parameters.options & RubberBandLiveShifter::OptionChannelsTogether;
|
|
|
|
double magMean = v_mean(classifyScale->mag.data() + 1, classify/2);
|
|
|
|
bool resetOnSilence = true;
|
|
if (useMidSide() && c == 1) {
|
|
// Do not phase reset on silence in the side channel - the
|
|
// reset is propagated across to the mid channel, giving
|
|
// constant resets for e.g. mono material in a stereo
|
|
// configuration
|
|
resetOnSilence = false;
|
|
}
|
|
|
|
if (m_useReadahead) {
|
|
m_guide.updateGuidance(ratio,
|
|
prevOuthop,
|
|
classifyScale->mag.data(),
|
|
classifyScale->prevMag.data(),
|
|
cd->readahead.mag.data(),
|
|
cd->segmentation,
|
|
cd->prevSegmentation,
|
|
cd->nextSegmentation,
|
|
magMean,
|
|
m_unityCount,
|
|
true,
|
|
tighterChannelLock,
|
|
resetOnSilence,
|
|
cd->guidance);
|
|
} else {
|
|
m_guide.updateGuidance(ratio,
|
|
prevOuthop,
|
|
classifyScale->prevMag.data(),
|
|
classifyScale->prevMag.data(),
|
|
classifyScale->mag.data(),
|
|
cd->segmentation,
|
|
cd->prevSegmentation,
|
|
cd->nextSegmentation,
|
|
magMean,
|
|
m_unityCount,
|
|
true,
|
|
tighterChannelLock,
|
|
resetOnSilence,
|
|
cd->guidance);
|
|
}
|
|
|
|
/*
|
|
if (c == 0) {
|
|
if (cd->guidance.kick.present) {
|
|
std::cout << "k:2" << std::endl;
|
|
} else if (cd->guidance.preKick.present) {
|
|
std::cout << "k:1" << std::endl;
|
|
} else {
|
|
std::cout << "k:0" << std::endl;
|
|
}
|
|
}
|
|
*/
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::analyseFormant(int c)
|
|
{
|
|
Profiler profiler("R3LiveShifter::analyseFormant");
|
|
|
|
auto &cd = m_channelData.at(c);
|
|
auto &f = *cd->formant;
|
|
|
|
int fftSize = f.fftSize;
|
|
int binCount = fftSize/2 + 1;
|
|
|
|
auto &scale = cd->scales.at(fftSize);
|
|
auto &scaleData = m_scaleData.at(fftSize);
|
|
|
|
scaleData->fft.inverseCepstral(scale->mag.data(), f.cepstra.data());
|
|
|
|
int cutoff = int(floor(m_parameters.sampleRate / 650.0));
|
|
if (cutoff < 1) cutoff = 1;
|
|
|
|
f.cepstra[0] /= 2.0;
|
|
f.cepstra[cutoff-1] /= 2.0;
|
|
for (int i = cutoff; i < fftSize; ++i) {
|
|
f.cepstra[i] = 0.0;
|
|
}
|
|
v_scale(f.cepstra.data(), 1.0 / double(fftSize), cutoff);
|
|
|
|
scaleData->fft.forward(f.cepstra.data(), f.envelope.data(), f.spare.data());
|
|
|
|
v_exp(f.envelope.data(), binCount);
|
|
v_square(f.envelope.data(), binCount);
|
|
|
|
for (int i = 0; i < binCount; ++i) {
|
|
if (f.envelope[i] > 1.0e10) f.envelope[i] = 1.0e10;
|
|
}
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::adjustFormant(int c)
|
|
{
|
|
Profiler profiler("R3LiveShifter::adjustFormant");
|
|
|
|
auto &cd = m_channelData.at(c);
|
|
|
|
for (auto &it : cd->scales) {
|
|
|
|
int fftSize = it.first;
|
|
auto &scale = it.second;
|
|
|
|
int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate));
|
|
process_t targetFactor = process_t(cd->formant->fftSize) / process_t(fftSize);
|
|
process_t formantScale = m_formantScale;
|
|
if (formantScale == 0.0) formantScale = 1.0 / m_pitchScale;
|
|
process_t sourceFactor = targetFactor / formantScale;
|
|
process_t maxRatio = 60.0;
|
|
process_t minRatio = 1.0 / maxRatio;
|
|
|
|
for (int b = 0; b < m_guideConfiguration.fftBandLimitCount; ++b) {
|
|
const auto &band = m_guideConfiguration.fftBandLimits[b];
|
|
if (band.fftSize != fftSize) continue;
|
|
for (int i = band.b0min; i < band.b1max && i < highBin; ++i) {
|
|
process_t source = cd->formant->envelopeAt(i * sourceFactor);
|
|
process_t target = cd->formant->envelopeAt(i * targetFactor);
|
|
if (target > 0.0) {
|
|
process_t ratio = source / target;
|
|
if (ratio < minRatio) ratio = minRatio;
|
|
if (ratio > maxRatio) ratio = maxRatio;
|
|
scale->mag[i] *= ratio;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::adjustPreKick(int c)
|
|
{
|
|
if (isSingleWindowed()) return;
|
|
|
|
Profiler profiler("R3LiveShifter::adjustPreKick");
|
|
|
|
auto &cd = m_channelData.at(c);
|
|
auto fftSize = cd->guidance.fftBands[0].fftSize;
|
|
if (cd->guidance.preKick.present) {
|
|
auto &scale = cd->scales.at(fftSize);
|
|
int from = binForFrequency(cd->guidance.preKick.f0,
|
|
fftSize, m_parameters.sampleRate);
|
|
int to = binForFrequency(cd->guidance.preKick.f1,
|
|
fftSize, m_parameters.sampleRate);
|
|
for (int i = from; i <= to; ++i) {
|
|
process_t diff = scale->mag[i] - scale->prevMag[i];
|
|
if (diff > 0.0) {
|
|
scale->pendingKick[i] = diff;
|
|
scale->mag[i] -= diff;
|
|
}
|
|
}
|
|
} else if (cd->guidance.kick.present) {
|
|
auto &scale = cd->scales.at(fftSize);
|
|
int from = binForFrequency(cd->guidance.preKick.f0,
|
|
fftSize, m_parameters.sampleRate);
|
|
int to = binForFrequency(cd->guidance.preKick.f1,
|
|
fftSize, m_parameters.sampleRate);
|
|
for (int i = from; i <= to; ++i) {
|
|
scale->mag[i] += scale->pendingKick[i];
|
|
scale->pendingKick[i] = 0.0;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
R3LiveShifter::synthesiseChannel(int c, int outhop, bool draining)
|
|
{
|
|
Profiler profiler("R3LiveShifter::synthesiseChannel");
|
|
|
|
int longest = m_guideConfiguration.longestFftSize;
|
|
|
|
auto &cd = m_channelData.at(c);
|
|
|
|
for (int b = 0; b < cd->guidance.fftBandCount; ++b) {
|
|
|
|
const auto &band = cd->guidance.fftBands[b];
|
|
int fftSize = band.fftSize;
|
|
|
|
auto &scale = cd->scales.at(fftSize);
|
|
auto &scaleData = m_scaleData.at(fftSize);
|
|
|
|
// copy to prevMag before filtering
|
|
v_copy(scale->prevMag.data(),
|
|
scale->mag.data(),
|
|
scale->bufSize);
|
|
|
|
process_t winscale = process_t(outhop) / scaleData->windowScaleFactor;
|
|
|
|
m_log.log(2, "R3LiveShifter::synthesiseChannel: outhop and winscale", outhop, winscale);
|
|
|
|
// The frequency filter is applied naively in the frequency
|
|
// domain. Aliasing is reduced by the shorter resynthesis
|
|
// window. We resynthesise each scale individually, then sum -
|
|
// it's easier to manage scaling for in situations with a
|
|
// varying resynthesis hop
|
|
|
|
int lowBin = binForFrequency(band.f0, fftSize, m_parameters.sampleRate);
|
|
int highBin = binForFrequency(band.f1, fftSize, m_parameters.sampleRate);
|
|
if (highBin % 2 == 0 && highBin > 0) --highBin;
|
|
|
|
int n = scale->mag.size();
|
|
if (lowBin >= n) lowBin = n - 1;
|
|
if (highBin >= n) highBin = n - 1;
|
|
if (highBin < lowBin) highBin = lowBin;
|
|
|
|
if (lowBin > 0) {
|
|
v_zero(scale->real.data(), lowBin);
|
|
v_zero(scale->imag.data(), lowBin);
|
|
}
|
|
|
|
v_scale(scale->mag.data() + lowBin, winscale, highBin - lowBin);
|
|
|
|
v_polar_to_cartesian(scale->real.data() + lowBin,
|
|
scale->imag.data() + lowBin,
|
|
scale->mag.data() + lowBin,
|
|
scale->advancedPhase.data() + lowBin,
|
|
highBin - lowBin);
|
|
|
|
if (highBin < scale->bufSize) {
|
|
v_zero(scale->real.data() + highBin, scale->bufSize - highBin);
|
|
v_zero(scale->imag.data() + highBin, scale->bufSize - highBin);
|
|
}
|
|
|
|
scaleData->fft.inverse(scale->real.data(),
|
|
scale->imag.data(),
|
|
scale->timeDomain.data());
|
|
|
|
v_fftshift(scale->timeDomain.data(), fftSize);
|
|
|
|
// Synthesis window may be shorter than analysis window, so
|
|
// copy and cut only from the middle of the time-domain frame;
|
|
// and the accumulator length always matches the longest FFT
|
|
// size, so as to make mixing straightforward, so there is an
|
|
// additional offset needed for the target
|
|
|
|
int synthesisWindowSize = scaleData->synthesisWindow.getSize();
|
|
int fromOffset = (fftSize - synthesisWindowSize) / 2;
|
|
int toOffset = (longest - synthesisWindowSize) / 2;
|
|
|
|
scaleData->synthesisWindow.cutAndAdd
|
|
(scale->timeDomain.data() + fromOffset,
|
|
scale->accumulator.data() + toOffset);
|
|
}
|
|
|
|
// Mix this channel and move the accumulator along
|
|
|
|
float *mixptr = cd->mixdown.data();
|
|
v_zero(mixptr, outhop);
|
|
|
|
for (auto &it : cd->scales) {
|
|
auto &scale = it.second;
|
|
|
|
process_t *accptr = scale->accumulator.data();
|
|
for (int i = 0; i < outhop; ++i) {
|
|
mixptr[i] += float(accptr[i]);
|
|
}
|
|
|
|
int n = scale->accumulator.size() - outhop;
|
|
v_move(accptr, accptr + outhop, n);
|
|
v_zero(accptr + n, outhop);
|
|
|
|
if (draining) {
|
|
if (scale->accumulatorFill > outhop) {
|
|
auto newFill = scale->accumulatorFill - outhop;
|
|
m_log.log(2, "draining: reducing accumulatorFill from, to", scale->accumulatorFill, newFill);
|
|
scale->accumulatorFill = newFill;
|
|
} else {
|
|
scale->accumulatorFill = 0;
|
|
}
|
|
} else {
|
|
scale->accumulatorFill = scale->accumulator.size();
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|