Files
librubberband/src/finer/R3LiveShifter.h
2024-04-19 15:12:25 +01:00

416 lines
15 KiB
C++

/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
/*
Rubber Band Library
An audio time-stretching and pitch-shifting library.
Copyright 2007-2024 Particular Programs Ltd.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version. See the file
COPYING included with this distribution for more information.
Alternatively, if you have a valid commercial licence for the
Rubber Band Library obtained by agreement with the copyright
holders, you may redistribute and/or modify it under the terms
described in that licence.
If you wish to distribute code using the Rubber Band Library
under terms other than those of the GNU General Public License,
you must obtain a valid commercial licence before doing so.
*/
#ifndef RUBBERBAND_R3_LIVE_SHIFTERIMPL_H
#define RUBBERBAND_R3_LIVE_SHIFTERIMPL_H
#include "BinSegmenter.h"
#include "Guide.h"
#include "Peak.h"
#include "PhaseAdvance.h"
#include "../common/Resampler.h"
#include "../common/FFT.h"
#include "../common/FixedVector.h"
#include "../common/Allocators.h"
#include "../common/Window.h"
#include "../common/VectorOpsComplex.h"
#include "../common/Log.h"
#include "../../rubberband/RubberBandLiveShifter.h"
#include <map>
#include <memory>
#include <atomic>
namespace RubberBand
{
class R3LiveShifter
{
public:
struct Parameters {
double sampleRate;
int channels;
RubberBandLiveShifter::Options options;
Parameters(double _sampleRate, int _channels,
RubberBandLiveShifter::Options _options) :
sampleRate(_sampleRate), channels(_channels), options(_options) { }
};
R3LiveShifter(Parameters parameters, Log log);
~R3LiveShifter() { }
void reset();
void setPitchScale(double scale);
void setFormantScale(double scale);
double getPitchScale() const;
double getFormantScale() const;
void setFormantOption(RubberBandLiveShifter::Options);
size_t getBlockSize() const;
void shift(const float *const *input, float *const *output);
size_t getPreferredStartPad() const;
size_t getStartDelay() const;
size_t getChannelCount() const;
void setDebugLevel(int level) {
m_log.setDebugLevel(level);
for (auto &sd : m_scaleData) {
sd.second->guided.setDebugLevel(level);
}
m_guide.setDebugLevel(level);
}
protected:
struct Limits {
int minPreferredOuthop;
int maxPreferredOuthop;
int minInhop;
int maxInhopWithReadahead;
int maxInhop;
Limits(RubberBandLiveShifter::Options options, double rate) :
// commented values are results when rate = 44100 or 48000
minPreferredOuthop(roundUpDiv(rate, 512)), // 128
maxPreferredOuthop(roundUpDiv(rate, 128)), // 512
minInhop(1),
maxInhopWithReadahead(roundUpDiv(rate, 64)), // 1024
maxInhop(roundUpDiv(rate, 32)) // 2048
{
if (!(options & RubberBandLiveShifter::OptionWindowLong)) {
minPreferredOuthop = roundUpDiv(rate, 256); // 256
maxPreferredOuthop = (roundUpDiv(rate, 128) * 5) / 4; // 640
maxInhopWithReadahead = roundUpDiv(rate, 128); // 512
maxInhop = (roundUpDiv(rate, 64) * 3) / 2; // 1536
}
}
};
struct ClassificationReadaheadData {
FixedVector<process_t> timeDomain;
FixedVector<process_t> mag;
FixedVector<process_t> phase;
ClassificationReadaheadData(int _fftSize) :
timeDomain(_fftSize, 0.f),
mag(_fftSize/2 + 1, 0.f),
phase(_fftSize/2 + 1, 0.f)
{ }
private:
ClassificationReadaheadData(const ClassificationReadaheadData &) =delete;
ClassificationReadaheadData &operator=(const ClassificationReadaheadData &) =delete;
};
struct ChannelScaleData {
int fftSize;
int bufSize; // size of every freq-domain array here: fftSize/2 + 1
FixedVector<process_t> timeDomain;
FixedVector<process_t> real;
FixedVector<process_t> imag;
FixedVector<process_t> mag;
FixedVector<process_t> phase;
FixedVector<process_t> advancedPhase;
FixedVector<process_t> prevMag;
FixedVector<process_t> pendingKick;
FixedVector<process_t> accumulator;
int accumulatorFill;
ChannelScaleData(int _fftSize, int _longestFftSize) :
fftSize(_fftSize),
bufSize(fftSize/2 + 1),
timeDomain(fftSize, 0.f),
real(bufSize, 0.f),
imag(bufSize, 0.f),
mag(bufSize, 0.f),
phase(bufSize, 0.f),
advancedPhase(bufSize, 0.f),
prevMag(bufSize, 0.f),
pendingKick(bufSize, 0.f),
accumulator(_longestFftSize, 0.f),
accumulatorFill(0)
{ }
void reset() {
v_zero(prevMag.data(), prevMag.size());
v_zero(pendingKick.data(), pendingKick.size());
v_zero(accumulator.data(), accumulator.size());
accumulatorFill = 0;
}
private:
ChannelScaleData(const ChannelScaleData &) =delete;
ChannelScaleData &operator=(const ChannelScaleData &) =delete;
};
struct FormantData {
int fftSize;
FixedVector<process_t> cepstra;
FixedVector<process_t> envelope;
FixedVector<process_t> spare;
FormantData(int _fftSize) :
fftSize(_fftSize),
cepstra(_fftSize, 0.0),
envelope(_fftSize/2 + 1, 0.0),
spare(_fftSize/2 + 1, 0.0) { }
process_t envelopeAt(process_t bin) const {
int b0 = int(floor(bin)), b1 = int(ceil(bin));
if (b0 < 0 || b0 > fftSize/2) {
return 0.0;
} else if (b1 == b0 || b1 > fftSize/2) {
return envelope.at(b0);
} else {
process_t diff = bin - process_t(b0);
return envelope.at(b0) * (1.0 - diff) + envelope.at(b1) * diff;
}
}
};
struct ChannelData {
std::map<int, std::shared_ptr<ChannelScaleData>> scales;
FixedVector<process_t> windowSource;
ClassificationReadaheadData readahead;
bool haveReadahead;
std::unique_ptr<BinClassifier> classifier;
FixedVector<BinClassifier::Classification> classification;
FixedVector<BinClassifier::Classification> nextClassification;
std::unique_ptr<BinSegmenter> segmenter;
BinSegmenter::Segmentation segmentation;
BinSegmenter::Segmentation prevSegmentation;
BinSegmenter::Segmentation nextSegmentation;
Guide::Guidance guidance;
FixedVector<float> mixdown;
FixedVector<float> resampled;
std::unique_ptr<RingBuffer<float>> inbuf;
std::unique_ptr<RingBuffer<float>> outbuf;
std::unique_ptr<FormantData> formant;
ChannelData(BinSegmenter::Parameters segmenterParameters,
BinClassifier::Parameters classifierParameters,
int longestFftSize,
int windowSourceSize,
int inRingBufferSize,
int outRingBufferSize) :
scales(),
windowSource(windowSourceSize, 0.0),
readahead(segmenterParameters.fftSize),
haveReadahead(false),
classifier(new BinClassifier(classifierParameters)),
classification(classifierParameters.binCount,
BinClassifier::Classification::Residual),
nextClassification(classifierParameters.binCount,
BinClassifier::Classification::Residual),
segmenter(new BinSegmenter(segmenterParameters)),
segmentation(), prevSegmentation(), nextSegmentation(),
mixdown(longestFftSize, 0.f),
resampled(outRingBufferSize, 0.f),
inbuf(new RingBuffer<float>(inRingBufferSize)),
outbuf(new RingBuffer<float>(outRingBufferSize)),
formant(new FormantData(segmenterParameters.fftSize)) { }
void reset() {
haveReadahead = false;
classifier->reset();
segmentation = BinSegmenter::Segmentation();
prevSegmentation = BinSegmenter::Segmentation();
nextSegmentation = BinSegmenter::Segmentation();
for (size_t i = 0; i < nextClassification.size(); ++i) {
nextClassification[i] = BinClassifier::Classification::Residual;
}
inbuf->reset();
outbuf->reset();
for (auto &s : scales) {
s.second->reset();
}
}
};
struct ChannelAssembly {
// Vectors of bare pointers, used to package container data
// from different channels into arguments for PhaseAdvance
FixedVector<const float *> input;
FixedVector<process_t *> mag;
FixedVector<process_t *> phase;
FixedVector<process_t *> prevMag;
FixedVector<Guide::Guidance *> guidance;
FixedVector<process_t *> outPhase;
FixedVector<float *> mixdown;
FixedVector<float *> resampled;
ChannelAssembly(int channels) :
input(channels, nullptr),
mag(channels, nullptr), phase(channels, nullptr),
prevMag(channels, nullptr), guidance(channels, nullptr),
outPhase(channels, nullptr), mixdown(channels, nullptr),
resampled(channels, nullptr) { }
};
struct ScaleData {
int fftSize;
bool singleWindowMode;
FFT fft;
Window<process_t> analysisWindow;
Window<process_t> synthesisWindow;
process_t windowScaleFactor;
GuidedPhaseAdvance guided;
ScaleData(GuidedPhaseAdvance::Parameters guidedParameters,
Log log) :
fftSize(guidedParameters.fftSize),
singleWindowMode(guidedParameters.singleWindowMode),
fft(fftSize),
analysisWindow(analysisWindowShape(),
analysisWindowLength()),
synthesisWindow(synthesisWindowShape(),
synthesisWindowLength()),
windowScaleFactor(0.0),
guided(guidedParameters, log)
{
int asz = analysisWindow.getSize(), ssz = synthesisWindow.getSize();
int off = (asz - ssz) / 2;
for (int i = 0; i < ssz; ++i) {
windowScaleFactor += analysisWindow.getValue(i + off) *
synthesisWindow.getValue(i);
}
}
WindowType analysisWindowShape();
int analysisWindowLength();
WindowType synthesisWindowShape();
int synthesisWindowLength();
};
Log m_log;
Parameters m_parameters;
const Limits m_limits;
std::atomic<double> m_pitchScale;
std::atomic<double> m_formantScale;
std::vector<std::shared_ptr<ChannelData>> m_channelData;
std::map<int, std::shared_ptr<ScaleData>> m_scaleData;
Guide m_guide;
Guide::Configuration m_guideConfiguration;
ChannelAssembly m_channelAssembly;
std::unique_ptr<Resampler> m_inResampler;
std::unique_ptr<Resampler> m_outResampler;
int m_resamplerDelay;
bool m_useReadahead;
int m_prevInhop;
int m_prevOuthop;
bool m_contractThenExpand; // otherwise expand then contract
bool m_firstProcess;
uint32_t m_unityCount;
void initialise();
void readIn(const float *const *input);
void generate(int required);
int readOut(float *const *output, int outcount);
void createResamplers();
void measureResamplerDelay();
void analyseChannel(int channel, int inhop, int prevInhop, int prevOuthop);
void analyseFormant(int channel);
void adjustFormant(int channel);
void adjustPreKick(int channel);
void synthesiseChannel(int channel, int outhop, bool draining);
struct ToPolarSpec {
int magFromBin;
int magBinCount;
int polarFromBin;
int polarBinCount;
};
Parameters validateSampleRate(const Parameters &params) {
Parameters validated { params };
double minRate = 8000.0, maxRate = 192000.0;
if (params.sampleRate < minRate) {
m_log.log(0, "R3LiveShifter: WARNING: Unsupported sample rate", params.sampleRate);
m_log.log(0, "R3LiveShifter: Minimum rate is", minRate);
validated.sampleRate = minRate;
} else if (params.sampleRate > maxRate) {
m_log.log(0, "R3LiveShifter: WARNING: Unsupported sample rate", params.sampleRate);
m_log.log(0, "R3LiveShifter: Maximum rate is", maxRate);
validated.sampleRate = maxRate;
}
return validated;
}
void convertToPolar(process_t *mag, process_t *phase,
const process_t *real, const process_t *imag,
const ToPolarSpec &s) const {
v_cartesian_to_polar(mag + s.polarFromBin,
phase + s.polarFromBin,
real + s.polarFromBin,
imag + s.polarFromBin,
s.polarBinCount);
if (s.magFromBin < s.polarFromBin) {
v_cartesian_to_magnitudes(mag + s.magFromBin,
real + s.magFromBin,
imag + s.magFromBin,
s.polarFromBin - s.magFromBin);
}
if (s.magFromBin + s.magBinCount > s.polarFromBin + s.polarBinCount) {
v_cartesian_to_magnitudes(mag + s.polarFromBin + s.polarBinCount,
real + s.polarFromBin + s.polarBinCount,
imag + s.polarFromBin + s.polarBinCount,
s.magFromBin + s.magBinCount -
s.polarFromBin - s.polarBinCount);
}
}
bool useMidSide() const {
return m_parameters.channels == 2 &&
(m_parameters.options &
RubberBandLiveShifter::OptionChannelsTogether);
}
bool isSingleWindowed() const {
return !(m_parameters.options &
RubberBandLiveShifter::OptionWindowLong);
}
int getWindowSourceSize() const {
if (m_useReadahead) {
int sz = m_guideConfiguration.classificationFftSize +
m_limits.maxInhopWithReadahead;
if (m_guideConfiguration.longestFftSize > sz) {
return m_guideConfiguration.longestFftSize;
} else {
return sz;
}
} else {
return m_guideConfiguration.longestFftSize;
}
}
};
}
#endif