From 199268815552a628012f0f8468e310086225e5e2 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 15 Sep 2020 11:43:57 +0100 Subject: [PATCH] Detect out-of-range samples on output and restart with lower gain --- main/main.cpp | 384 +++++++++++++++++++++++++++----------------------- 1 file changed, 211 insertions(+), 173 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index 53c865c..6404fbe 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -121,6 +121,8 @@ int main(int argc, char **argv) SoftDetector } detector = CompoundDetector; + bool ignoreClipping = false; + while (1) { int optionIndex = 0; @@ -155,6 +157,7 @@ int main(int argc, char **argv) { "timemap", 1, 0, 'M' }, { "freqmap", 1, 0, 'Q' }, { "pitchmap", 1, 0, 'C' }, + { "ignore-clipping", 0, 0, 'i' }, { 0, 0, 0, 0 } }; @@ -175,7 +178,7 @@ int main(int argc, char **argv) case 'R': realtime = true; break; case 'L': precise = false; break; case 'P': precise = true; break; - case 'F': formant = true; break; + case 'F': formant = true; break; case '0': threading = 1; break; case '@': threading = 2; break; case '1': transients = NoTransients; crispchanged = true; break; @@ -193,6 +196,7 @@ int main(int argc, char **argv) case 'M': timeMapFile = optarg; break; case 'Q': freqMapFile = optarg; freqOrPitchMapSpecified = true; break; case 'C': pitchMapFile = optarg; freqOrPitchMapSpecified = true; break; + case 'i': ignoreClipping = true; break; default: help = true; break; } } @@ -213,11 +217,11 @@ int main(int argc, char **argv) if (help || !haveRatio || optind + 2 != argc) { cerr << endl; - cerr << "Rubber Band" << endl; + cerr << "Rubber Band" << endl; cerr << "An audio time-stretching and pitch-shifting library and utility program." << endl; - cerr << "Copyright 2007-2020 Particular Programs Ltd." << endl; + cerr << "Copyright 2007-2020 Particular Programs Ltd." << endl; cerr << endl; - cerr << " Usage: " << argv[0] << " [options] " << endl; + cerr << " Usage: " << argv[0] << " [options] " << endl; cerr << endl; cerr << "You must specify at least one of the following time and pitch ratio options." << endl; cerr << endl; @@ -260,7 +264,7 @@ int main(int argc, char **argv) cerr << "for more details." << endl; cerr << endl; cerr << " -c, --crisp Crispness (N = 0,1,2,3,4,5,6); default 5 (see below)" << endl; - cerr << " -F, --formant Enable formant preservation when pitch shifting" << endl; + cerr << " -F, --formant Enable formant preservation when pitch shifting" << endl; cerr << endl; cerr << "The remaining options fine-tune the processing mode and stretch algorithm." << endl; cerr << "These are mostly included for test purposes; the default settings and standard" << endl; @@ -283,6 +287,8 @@ int main(int argc, char **argv) cerr << " --pitch-hq In RT mode, use a slower, higher quality pitch shift" << endl; cerr << " --centre-focus Preserve focus of centre material in stereo" << endl; cerr << " (at a cost in width and individual channel quality)" << endl; + cerr << " --ignore-clipping Ignore clipping at output; the default is to restart" << endl; + cerr << " with reduced gain if clipping occurs" << endl; cerr << endl; cerr << " -d, --debug Select debug level (N = 0,1,2,3); default 0, full 3" << endl; cerr << " (N.B. debug level 3 includes audible ticks in output)" << endl; @@ -300,7 +306,7 @@ int main(int argc, char **argv) cerr << " -c 5 default processing options" << endl; cerr << " -c 6 equivalent to --no-lamination --window-short (may be good for drums)" << endl; cerr << endl; - return 2; + return 2; } if (ratio <= 0.0) { @@ -447,9 +453,9 @@ int main(int argc, char **argv) sndfile = sf_open(fileName, SFM_READ, &sfinfo); if (!sndfile) { - cerr << "ERROR: Failed to open input file \"" << fileName << "\": " - << sf_strerror(sndfile) << endl; - return 1; + cerr << "ERROR: Failed to open input file \"" << fileName << "\": " + << sf_strerror(sndfile) << endl; + return 1; } if (sfinfo.samplerate == 0) { @@ -475,9 +481,9 @@ int main(int argc, char **argv) sndfileOut = sf_open(fileNameOut, SFM_WRITE, &sfinfoOut) ; if (!sndfileOut) { - cerr << "ERROR: Failed to open output file \"" << fileNameOut << "\" for writing: " - << sf_strerror(sndfileOut) << endl; - return 1; + cerr << "ERROR: Failed to open output file \"" << fileNameOut << "\" for writing: " + << sf_strerror(sndfileOut) << endl; + return 1; } int ibs = 1024; @@ -551,35 +557,118 @@ int main(int argc, char **argv) #endif timeval tv; (void)gettimeofday(&tv, 0); - + RubberBandStretcher::setDefaultDebugLevel(debug); - RubberBandStretcher ts(sfinfo.samplerate, channels, options, - ratio, frequencyshift); + size_t countIn = 0, countOut = 0; - ts.setExpectedInputDuration(sfinfo.frames); + float gain = 1.f; + bool successful = false; - float *fbuf = new float[channels * ibs]; - float **ibuf = new float *[channels]; - for (size_t i = 0; i < channels; ++i) ibuf[i] = new float[ibs]; + while (!successful) { // we may have to repeat with a modified + // gain, if clipping occurs + successful = true; - int frame = 0; - int percent = 0; + RubberBandStretcher ts(sfinfo.samplerate, channels, options, + ratio, frequencyshift); + ts.setExpectedInputDuration(sfinfo.frames); - sf_seek(sndfile, 0, SEEK_SET); - - if (!realtime) { - - if (!quiet) { - cerr << "Pass 1: Studying..." << endl; + float *fbuf = new float[channels * ibs]; + float **ibuf = new float *[channels]; + for (size_t i = 0; i < channels; ++i) { + ibuf[i] = new float[ibs]; } + int frame = 0; + int percent = 0; + + sf_seek(sndfile, 0, SEEK_SET); + + if (!realtime) { + + if (!quiet) { + cerr << "Pass 1: Studying..." << endl; + } + + while (frame < sfinfo.frames) { + + int count = -1; + + if ((count = sf_readf_float(sndfile, fbuf, ibs)) <= 0) break; + + for (size_t c = 0; c < channels; ++c) { + for (int i = 0; i < count; ++i) { + float value = fbuf[i * channels + c]; + ibuf[c][i] = value; + } + } + + bool final = (frame + ibs >= sfinfo.frames); + + ts.study(ibuf, count, final); + + int p = int((double(frame) * 100.0) / sfinfo.frames); + if (p > percent || frame == 0) { + percent = p; + if (!quiet) { + cerr << "\r" << percent << "% "; + } + } + + frame += ibs; + } + + if (!quiet) { + cerr << "\rCalculating profile..." << endl; + } + + sf_seek(sndfile, 0, SEEK_SET); + } + + frame = 0; + percent = 0; + + if (!timeMap.empty()) { + ts.setKeyFrameMap(timeMap); + } + + std::map::const_iterator freqMapItr = freqMap.begin(); + + countIn = 0; + countOut = 0; + bool clipping = false; + while (frame < sfinfo.frames) { int count = -1; + int thisBlockSize = ibs; - if ((count = sf_readf_float(sndfile, fbuf, ibs)) <= 0) break; + while (freqMapItr != freqMap.end()) { + size_t nextFreqFrame = freqMapItr->first + ts.getLatency(); + if (nextFreqFrame <= countIn) { + double s = frequencyshift * freqMapItr->second; + if (debug > 0) { + cerr << "at frame " << countIn + << " (requested at " << freqMapItr->first + << " plus latency " << ts.getLatency() + << ") updating frequency ratio to " << s << endl; + } + ts.setPitchScale(s); + ++freqMapItr; + } else { + if (nextFreqFrame < countIn + thisBlockSize) { + thisBlockSize = nextFreqFrame - countIn; + } + break; + } + } + if ((count = sf_readf_float(sndfile, fbuf, thisBlockSize)) < 0) { + break; + } + + countIn += count; + for (size_t c = 0; c < channels; ++c) { for (int i = 0; i < count; ++i) { float value = fbuf[i * channels + c]; @@ -587,9 +676,62 @@ int main(int argc, char **argv) } } - bool final = (frame + ibs >= sfinfo.frames); + bool final = (frame + thisBlockSize >= sfinfo.frames); - ts.study(ibuf, count, final); + if (debug > 2) { + cerr << "count = " << count << ", ibs = " << thisBlockSize << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl; + } + + ts.process(ibuf, count, final); + + int avail = ts.available(); + if (debug > 1) cerr << "available = " << avail << endl; + + if (avail > 0) { + float **obf = new float *[channels]; + for (size_t i = 0; i < channels; ++i) { + obf[i] = new float[avail]; + } + ts.retrieve(obf, avail); + countOut += avail; + float *fobf = new float[channels * avail]; + for (size_t c = 0; c < channels; ++c) { + for (int i = 0; i < avail; ++i) { + float value = gain * obf[c][i]; + if (ignoreClipping) { // i.e. just clamp, don't bail out + if (value > 1.f) value = 1.f; + if (value < -1.f) value = -1.f; + } else { + if (value >= 1.f || value < -1.f) { + clipping = true; + gain = (0.999f / fabsf(obf[c][i])); + } + } + fobf[i * channels + c] = value; + } + } + sf_writef_float(sndfileOut, fobf, avail); + delete[] fobf; + for (size_t i = 0; i < channels; ++i) { + delete[] obf[i]; + } + delete[] obf; + } + + if (clipping) { + if (!quiet) { + cerr << "NOTE: Clipping detected at output sample " + << countOut << ", restarting with " + << "reduced gain of " << gain + << " (supply --ignore-clipping to avoid this)" << endl; + } + successful = false; + break; + } + + if (frame == 0 && !realtime && !quiet) { + cerr << "Pass 2: Processing..." << endl; + } int p = int((double(frame) * 100.0) / sfinfo.frames); if (p > percent || frame == 0) { @@ -599,159 +741,55 @@ int main(int argc, char **argv) } } - frame += ibs; + frame += thisBlockSize; } - if (!quiet) { - cerr << "\rCalculating profile..." << endl; + if (!successful) { + sf_seek(sndfile, 0, SEEK_SET); + sf_seek(sndfileOut, 0, SEEK_SET); + continue; } - - sf_seek(sndfile, 0, SEEK_SET); - } - - frame = 0; - percent = 0; - - if (!timeMap.empty()) { - ts.setKeyFrameMap(timeMap); - } - - std::map::const_iterator freqMapItr = freqMap.begin(); - size_t countIn = 0, countOut = 0; + if (!quiet) { + cerr << "\r " << endl; + } + int avail; - while (frame < sfinfo.frames) { + while ((avail = ts.available()) >= 0) { - int count = -1; - int thisBlockSize = ibs; - - while (freqMapItr != freqMap.end()) { - size_t nextFreqFrame = freqMapItr->first + ts.getLatency(); - if (nextFreqFrame <= countIn) { - double s = frequencyshift * freqMapItr->second; - if (debug > 0) { - cerr << "at frame " << countIn - << " (requested at " << freqMapItr->first - << " plus latency " << ts.getLatency() - << ") updating frequency ratio to " << s << endl; + if (debug > 1) { + cerr << "(completing) available = " << avail << endl; + } + + if (avail > 0) { + float **obf = new float *[channels]; + for (size_t i = 0; i < channels; ++i) { + obf[i] = new float[avail]; } - ts.setPitchScale(s); - ++freqMapItr; + ts.retrieve(obf, avail); + countOut += avail; + float *fobf = new float[channels * avail]; + for (size_t c = 0; c < channels; ++c) { + for (int i = 0; i < avail; ++i) { + float value = gain * obf[c][i]; + if (value > 1.f) value = 1.f; + if (value < -1.f) value = -1.f; + fobf[i * channels + c] = value; + } + } + + sf_writef_float(sndfileOut, fobf, avail); + delete[] fobf; + for (size_t i = 0; i < channels; ++i) { + delete[] obf[i]; + } + delete[] obf; } else { - if (nextFreqFrame < countIn + thisBlockSize) { - thisBlockSize = nextFreqFrame - countIn; - } - break; + usleep(10000); } } - - if ((count = sf_readf_float(sndfile, fbuf, thisBlockSize)) < 0) break; - - countIn += count; - - for (size_t c = 0; c < channels; ++c) { - for (int i = 0; i < count; ++i) { - float value = fbuf[i * channels + c]; - ibuf[c][i] = value; - } - } - - bool final = (frame + thisBlockSize >= sfinfo.frames); - - if (debug > 2) { - cerr << "count = " << count << ", ibs = " << thisBlockSize << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl; - } - - ts.process(ibuf, count, final); - - int avail = ts.available(); - if (debug > 1) cerr << "available = " << avail << endl; - - if (avail > 0) { - float **obf = new float *[channels]; - for (size_t i = 0; i < channels; ++i) { - obf[i] = new float[avail]; - } - ts.retrieve(obf, avail); - countOut += avail; - float *fobf = new float[channels * avail]; - for (size_t c = 0; c < channels; ++c) { - for (int i = 0; i < avail; ++i) { - float value = obf[c][i]; - if (value > 1.f) value = 1.f; - if (value < -1.f) value = -1.f; - fobf[i * channels + c] = value; - } - } -// cout << "fobf mean: "; -// double d = 0; -// for (int i = 0; i < avail; ++i) { -// d += fobf[i]; -// } -// d /= avail; -// cout << d << endl; - sf_writef_float(sndfileOut, fobf, avail); - delete[] fobf; - for (size_t i = 0; i < channels; ++i) { - delete[] obf[i]; - } - delete[] obf; - } - - if (frame == 0 && !realtime && !quiet) { - cerr << "Pass 2: Processing..." << endl; - } - - int p = int((double(frame) * 100.0) / sfinfo.frames); - if (p > percent || frame == 0) { - percent = p; - if (!quiet) { - cerr << "\r" << percent << "% "; - } - } - - frame += thisBlockSize; - } - - if (!quiet) { - cerr << "\r " << endl; - } - int avail; - - while ((avail = ts.available()) >= 0) { - - if (debug > 1) { - cerr << "(completing) available = " << avail << endl; - } - - if (avail > 0) { - float **obf = new float *[channels]; - for (size_t i = 0; i < channels; ++i) { - obf[i] = new float[avail]; - } - ts.retrieve(obf, avail); - countOut += avail; - float *fobf = new float[channels * avail]; - for (size_t c = 0; c < channels; ++c) { - for (int i = 0; i < avail; ++i) { - float value = obf[c][i]; - if (value > 1.f) value = 1.f; - if (value < -1.f) value = -1.f; - fobf[i * channels + c] = value; - } - } - - sf_writef_float(sndfileOut, fobf, avail); - delete[] fobf; - for (size_t i = 0; i < channels; ++i) { - delete[] obf[i]; - } - delete[] obf; - } else { - usleep(10000); - } } - + sf_close(sndfile); sf_close(sndfileOut); @@ -762,7 +800,7 @@ int main(int argc, char **argv) #ifdef _WIN32 RubberBand:: #endif - timeval etv; + timeval etv; (void)gettimeofday(&etv, 0); etv.tv_sec -= tv.tv_sec;