Update the OptionPitch* options so that they do the expected thing for the R3 engine as well - this speeds up realtime mode when not dynamically pitch-shifting

This commit is contained in:
Chris Cannam
2022-07-06 10:22:50 +01:00
parent 6ecd103dd0
commit 6e941fa2d3
7 changed files with 75 additions and 40 deletions

View File

@@ -30,3 +30,4 @@ out-*/
playlist-out/*
formant-out-*/
out*.wav
packages/

View File

@@ -12,6 +12,9 @@ Changes in Rubber Band v3.0.0
stream and/or handled in a realtime-safe way
* Add option to shift formant independently of pitch (R3 engine
only)
* Deprecate the rather ambiguous function getLatency(), effectively
replacing it with two new functions getPreferredStartPad() and
getStartDelay(). See their documentation for more details
The library is both binary and API compatible all the way back to
version 1.2 for forward compatibility, but not backward compatibility,

View File

@@ -312,24 +312,24 @@ int main(int argc, char **argv)
}
if (fullHelp) {
cerr << "The remaining options fine-tune the processing mode and stretch algorithm." << endl;
cerr << "These are mostly included for test purposes; the default settings and standard" << endl;
cerr << "crispness parameter are intended to provide the best sounding set of options" << endl;
cerr << "for most situations. The default is to use none of these options." << endl;
cerr << "The default is to use none of these options." << endl;
cerr << "The options marked (2) currently only have an effect when using the R2 engine" << endl;
cerr << "(see -2, -3 options above)." << endl;
cerr << endl;
cerr << " -R, --realtime Select realtime mode (implies --no-threads)." << endl;
cerr << " This utility does not do realtime stream processing;" << endl;
cerr << " the option merely selects realtime mode for the" << endl;
cerr << " stretcher it uses" << endl;
cerr << " --no-threads No extra threads regardless of CPU and channel count" << endl;
cerr << " --threads Assume multi-CPU even if only one CPU is identified" << endl;
cerr << " --no-transients Disable phase resynchronisation at transients" << endl;
cerr << " --bl-transients Band-limit phase resync to extreme frequencies" << endl;
cerr << " --no-lamination Disable phase lamination" << endl;
cerr << " --window-long Use longer processing window (actual size may vary)" << endl;
cerr << " --window-short Use shorter processing window" << endl;
cerr << " --smoothing Apply window presum and time-domain smoothing" << endl;
cerr << " --detector-perc Use percussive transient detector (as in pre-1.5)" << endl;
cerr << " --detector-soft Use soft transient detector" << endl;
cerr << "(2) --no-threads No extra threads regardless of CPU and channel count" << endl;
cerr << "(2) --threads Assume multi-CPU even if only one CPU is identified" << endl;
cerr << "(2) --no-transients Disable phase resynchronisation at transients" << endl;
cerr << "(2) --bl-transients Band-limit phase resync to extreme frequencies" << endl;
cerr << "(2) --no-lamination Disable phase lamination" << endl;
cerr << "(2) --window-long Use longer processing window (actual size may vary)" << endl;
cerr << "(2) --window-short Use shorter processing window" << endl;
cerr << "(2) --smoothing Apply window presum and time-domain smoothing" << endl;
cerr << "(2) --detector-perc Use percussive transient detector (as in pre-1.5)" << endl;
cerr << "(2) --detector-soft Use soft transient detector" << endl;
cerr << " --pitch-hq In RT mode, use a slower, higher quality pitch shift" << endl;
cerr << " --centre-focus Preserve focus of centre material in stereo" << endl;
cerr << " (at a cost in width and individual channel quality)" << endl;
@@ -350,7 +350,7 @@ int main(int argc, char **argv)
cerr << " -H, --full-help Show the full help output" << endl;
cerr << endl;
if (fullHelp) {
cerr << "\"Crispness\" levels:" << endl;
cerr << "\"Crispness\" levels: (2)" << endl;
cerr << " -c 0 equivalent to --no-transients --no-lamination --window-long" << endl;
cerr << " -c 1 equivalent to --detector-soft --no-lamination --window-long (for piano)" << endl;
cerr << " -c 2 equivalent to --no-transients --no-lamination" << endl;

View File

@@ -310,26 +310,29 @@ public:
* perceived pitch profile of the voice or instrument.
*
* 10. Flags prefixed \c OptionPitch control the method used for
* pitch shifting in the R2 engine. These options have no effect
* when using the R3 engine. These options may be changed at any
* time. They are only effective in realtime mode; in offline
* mode, the pitch-shift method is fixed.
* pitch shifting. In the R2 engine they may be changed at any
* time but affect only realtime mode (in offline mode the method
* cannot be changed). In the R3 engine they affect both realtime
* and offline modes but are fixed on construction.
*
* \li \c OptionPitchHighSpeed - Use a method with a CPU cost
* that is relatively moderate and predictable. This may
* sound less clear than OptionPitchHighQuality, especially
* for large pitch shifts. This is the default.
* \li \c OptionPitchHighSpeed - Favour CPU cost over sound
* quality. This is the default. Use this when time-stretching
* only, or for fixed pitch shifts where CPU usage is of
* concern. Do not use this for arbitrarily time-varying pitch
* shifts (see OptionPitchHighConsistency below).
*
* \li \c OptionPitchHighQuality - Favour sound quality over CPU
* cost. Use this for fixed pitch shifts where sound quality is
* of most concern. Do not use this for arbitrarily time-varying
* pitch shifts (see OptionPitchHighConsistency below).
* \li \c OptionPitchHighQuality - Use the highest quality
* method for pitch shifting. This method has a CPU cost
* approximately proportional to the required frequency shift.
* \li \c OptionPitchHighConsistency - Use the method that gives
* greatest consistency when used to create small variations in
* pitch around the 1.0-ratio level. Unlike the previous two
* options, this avoids discontinuities when moving across the
* 1.0 pitch scale in real-time; it also consumes more CPU than
* the others in the case where the pitch scale is exactly 1.0.
* \li \c OptionPitchHighConsistency - Use a method that
* supports dynamic pitch changes without discontinuities,
* including when crossing the 1.0 pitch scale. This may cost
* more in CPU than the other two options, especially when the
* pitch scale is exactly 1.0. You should use this option
* whenever you wish to support dynamically changing pitch
* shifts during processing.
*
* 11. Flags prefixed \c OptionChannels control the method used
* for processing two-channel stereo audio. These have different,

View File

@@ -209,6 +209,7 @@ public:
setPitchOption(Options options)
{
if (m_r2) m_r2->setPitchOption(options);
else if (m_r3) m_r3->setPitchOption(options);
}
void

View File

@@ -208,6 +208,12 @@ R3Stretcher::setFormantOption(RubberBandStretcher::Options options)
m_parameters.options |= options;
}
void
R3Stretcher::setPitchOption(RubberBandStretcher::Options)
{
m_log.log(0, "R3Stretcher::setPitchOption: Option change after construction is not supported in R3 engine");
}
void
R3Stretcher::setKeyFrameMap(const std::map<size_t, size_t> &mapping)
{
@@ -227,15 +233,26 @@ void
R3Stretcher::createResampler()
{
Resampler::Parameters resamplerParameters;
if (m_parameters.options & RubberBandStretcher::OptionPitchHighQuality) {
resamplerParameters.quality = Resampler::Best;
} else {
resamplerParameters.quality = Resampler::FastestTolerable;
}
resamplerParameters.initialSampleRate = m_parameters.sampleRate;
resamplerParameters.maxBufferSize = m_guideConfiguration.longestFftSize;
if (isRealTime()) {
if (m_parameters.options &
RubberBandStretcher::OptionPitchHighConsistency) {
resamplerParameters.dynamism = Resampler::RatioOftenChanging;
resamplerParameters.ratioChange = Resampler::SmoothRatioChange;
} else {
// ratio can't be changed in offline mode
resamplerParameters.dynamism = Resampler::RatioMostlyFixed;
resamplerParameters.ratioChange = Resampler::SmoothRatioChange;
}
} else {
resamplerParameters.dynamism = Resampler::RatioMostlyFixed;
resamplerParameters.ratioChange = Resampler::SuddenRatioChange;
}
@@ -693,8 +710,17 @@ R3Stretcher::consume()
// Resample
int resampledCount = 0;
bool resampling = false;
if (m_resampler) {
if (m_pitchScale != 1.0 ||
(m_parameters.options &
RubberBandStretcher::OptionPitchHighConsistency)) {
resampling = true;
}
}
int resampledCount = 0;
if (resampling) {
for (int c = 0; c < channels; ++c) {
auto &cd = m_channelData.at(c);
m_channelAssembly.mixdown[c] = cd->mixdown.data();
@@ -712,7 +738,7 @@ R3Stretcher::consume()
// Emit
int writeCount = validCount;
if (m_resampler) {
if (resampling) {
writeCount = resampledCount;
}
@@ -729,7 +755,7 @@ R3Stretcher::consume()
for (int c = 0; c < channels; ++c) {
auto &cd = m_channelData.at(c);
if (m_resampler) {
if (resampling) {
cd->outbuf->write(cd->resampled.data(), writeCount);
} else {
cd->outbuf->write(cd->mixdown.data(), writeCount);

View File

@@ -77,6 +77,7 @@ public:
void setKeyFrameMap(const std::map<size_t, size_t> &);
void setFormantOption(RubberBandStretcher::Options);
void setPitchOption(RubberBandStretcher::Options);
void study(const float *const *input, size_t samples, bool final);
size_t getSamplesRequired() const;