Provide option to shift formant independently of pitch

This commit is contained in:
Chris Cannam
2022-06-13 10:39:13 +01:00
parent 182e2b0e3b
commit c7e4d9eb07
4 changed files with 70 additions and 1 deletions

View File

@@ -421,6 +421,34 @@ public:
*/ */
void setPitchScale(double scale); void setPitchScale(double scale);
/**
* Set a pitch scale for the vocal formant envelope separately
* from the overall pitch scale. This is a ratio of target
* frequency to source frequency. For example, a ratio of 2.0
* would shift the formant envelope up by one octave; 0.5 down by
* one octave; or 1.0 leave the formant unaffected.
*
* By default this is set to the special value of 0.0, which
* causes the scale to be calculated automatically. It will be
* treated as 1.0 / the pitch scale if OptionFormantPreserved is
* specified, or 1.0 for OptionFormantShifted.
*
* Conversely, if this is set to a value other than the default
* 0.0, formant shifting will happen regardless of the state of
* the OptionFormantPreserved/OptionFormantShifted option.
*
* This function is provided for special effects only. You do not
* need to call it for ordinary pitch shifting, with or without
* formant preservation - just specify or omit the
* OptionFormantPreserved option as appropriate. Use this function
* only if you want to shift formants by a distance other than
* that of the overall pitch shift.
*
* This function is supported only in the R3 (OptionEngineFiner)
* engine. It has no effect in R2 (OptionEngineFaster).
*/
void setFormantScale(double scale);
/** /**
* Return the last time ratio value that was set (either on * Return the last time ratio value that was set (either on
* construction or with setTimeRatio()). * construction or with setTimeRatio()).
@@ -433,6 +461,16 @@ public:
*/ */
double getPitchScale() const; double getPitchScale() const;
/**
* Return the last formant scaling ratio that was set with
* setFormantScale, or 0.0 if the default automatic scaling is in
* effect.
*
* This function is supported only in the R3 (OptionEngineFiner)
* engine. It always returns 0.0 in R2 (OptionEngineFaster).
*/
double getFormantScale() const;
/** /**
* Return the output delay or latency of the stretcher. This is * Return the output delay or latency of the stretcher. This is
* the number of audio samples that one would have to discard at * the number of audio samples that one would have to discard at

View File

@@ -75,6 +75,12 @@ RubberBandStretcher::setPitchScale(double scale)
else m_r3d->setPitchScale(scale); else m_r3d->setPitchScale(scale);
} }
void
RubberBandStretcher::setFormantScale(double scale)
{
if (m_r3d) m_r3d->setFormantScale(scale);
}
double double
RubberBandStretcher::getTimeRatio() const RubberBandStretcher::getTimeRatio() const
{ {
@@ -89,6 +95,13 @@ RubberBandStretcher::getPitchScale() const
else return m_r3d->getPitchScale(); else return m_r3d->getPitchScale();
} }
double
RubberBandStretcher::getFormantScale() const
{
if (m_d) return 0.0;
else return m_r3d->getFormantScale();
}
size_t size_t
RubberBandStretcher::getLatency() const RubberBandStretcher::getLatency() const
{ {

View File

@@ -35,6 +35,7 @@ R3StretcherImpl::R3StretcherImpl(Parameters parameters,
m_parameters(parameters), m_parameters(parameters),
m_timeRatio(initialTimeRatio), m_timeRatio(initialTimeRatio),
m_pitchScale(initialPitchScale), m_pitchScale(initialPitchScale),
m_formantScale(0.0),
m_guide(Guide::Parameters(m_parameters.sampleRate, parameters.logger)), m_guide(Guide::Parameters(m_parameters.sampleRate, parameters.logger)),
m_guideConfiguration(m_guide.getConfiguration()), m_guideConfiguration(m_guide.getConfiguration()),
m_channelAssembly(m_parameters.channels), m_channelAssembly(m_parameters.channels),
@@ -173,6 +174,12 @@ R3StretcherImpl::setPitchScale(double scale)
calculateHop(); calculateHop();
} }
void
R3StretcherImpl::setFormantScale(double scale)
{
m_formantScale = scale;
}
void void
R3StretcherImpl::setFormantOption(RubberBandStretcher::Options options) R3StretcherImpl::setFormantOption(RubberBandStretcher::Options options)
{ {
@@ -233,6 +240,12 @@ R3StretcherImpl::getPitchScale() const
return m_pitchScale; return m_pitchScale;
} }
double
R3StretcherImpl::getFormantScale() const
{
return m_formantScale;
}
size_t size_t
R3StretcherImpl::getLatency() const R3StretcherImpl::getLatency() const
{ {
@@ -777,7 +790,9 @@ R3StretcherImpl::adjustFormant(int c)
int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate)); int highBin = int(floor(fftSize * 10000.0 / m_parameters.sampleRate));
double targetFactor = double(cd->formant->fftSize) / double(fftSize); double targetFactor = double(cd->formant->fftSize) / double(fftSize);
double sourceFactor = targetFactor * m_pitchScale; double formantScale = m_formantScale;
if (formantScale == 0.0) formantScale = 1.0 / m_pitchScale;
double sourceFactor = targetFactor / formantScale;
double maxRatio = 60.0; double maxRatio = 60.0;
double minRatio = 1.0 / maxRatio; double minRatio = 1.0 / maxRatio;

View File

@@ -69,9 +69,11 @@ public:
void setTimeRatio(double ratio); void setTimeRatio(double ratio);
void setPitchScale(double scale); void setPitchScale(double scale);
void setFormantScale(double scale);
double getTimeRatio() const; double getTimeRatio() const;
double getPitchScale() const; double getPitchScale() const;
double getFormantScale() const;
void setFormantOption(RubberBandStretcher::Options); void setFormantOption(RubberBandStretcher::Options);
void setPitchOption(RubberBandStretcher::Options); void setPitchOption(RubberBandStretcher::Options);
@@ -265,6 +267,7 @@ protected:
std::atomic<double> m_timeRatio; std::atomic<double> m_timeRatio;
std::atomic<double> m_pitchScale; std::atomic<double> m_pitchScale;
std::atomic<double> m_formantScale;
std::vector<std::shared_ptr<ChannelData>> m_channelData; std::vector<std::shared_ptr<ChannelData>> m_channelData;
std::map<int, std::shared_ptr<ScaleData>> m_scaleData; std::map<int, std::shared_ptr<ScaleData>> m_scaleData;