From fa5ad0384f850520cc174178e9be4291094b94c7 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 10 Sep 2020 15:31:27 +0100 Subject: [PATCH 01/23] Add docs (only) for freq/pitch map --- main/main.cpp | 105 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 86 insertions(+), 19 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index f6b259c..d9a59fc 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -104,7 +104,9 @@ int main(int argc, char **argv) bool haveRatio = false; - std::string mapfile; + std::string timeMapFile; + std::string freqMapFile; + std::string pitchMapFile; enum { NoTransients, @@ -150,6 +152,8 @@ int main(int argc, char **argv) { "threads", 0, 0, '@' }, { "quiet", 0, 0, 'q' }, { "timemap", 1, 0, 'M' }, + { "freqmap", 1, 0, 'Q' }, + { "pitchmap", 1, 0, 'C' }, { 0, 0, 0, 0 } }; @@ -185,7 +189,9 @@ int main(int argc, char **argv) case '%': hqpitch = true; break; case 'c': crispness = atoi(optarg); break; case 'q': quiet = true; break; - case 'M': mapfile = optarg; break; + case 'M': timeMapFile = optarg; break; + case 'Q': freqMapFile = optarg; break; + case 'C': pitchMapFile = optarg; break; default: help = true; break; } } @@ -213,12 +219,31 @@ int main(int argc, char **argv) cerr << " -p, --pitch Raise pitch by X semitones, or" << endl; cerr << " -f, --frequency Change frequency by multiple X" << endl; cerr << endl; - cerr << " -M, --timemap Use file F as the source for key frame map" << endl; + cerr << "The following options provide ways of making the time and frequency ratios" << endl; + cerr << "change during the audio." << endl; cerr << endl; - cerr << "A map file consists of a series of lines each having two numbers separated" << endl; - cerr << "by a single space. These are source and target sample frame numbers for fixed" << endl; - cerr << "time points within the audio data, defining a varying stretch factor through" << endl; - cerr << "the audio. You must specify an overall stretch factor using e.g. -t as well." << endl; + cerr << " -M, --timemap Use file F as the source for time map" << endl; + cerr << endl; + cerr << " A time map (or key-frame map) file contains a series of lines, each with two" << endl; + cerr << " sample frame numbers separated by a single space. These are source and" << endl; + cerr << " target frames for fixed time points within the audio data, defining a varying" << endl; + cerr << " stretch factor through the audio." << endl; + cerr << " You must specify an overall stretch factor using e.g. -t as well." << endl; + cerr << endl; + cerr << " --pitchmap Use file F as the source for pitch map" << endl; + cerr << endl; + cerr << " A pitch map file contains a series of lines, each with two values: a" << endl; + cerr << " sample frame number and a pitch offset in semitones, separated by a single" << endl; + cerr << " space. These specify a varying pitch factor through the audio. The offsets" << endl; + cerr << " are all relative to the initial offset of zero, not to the previous offset." << endl; + cerr << " This option implies realtime mode (-R)." << endl; + cerr << endl; + cerr << " --freqmap Use file F as the source for frequency map" << endl; + cerr << endl; + cerr << " As --pitchmap, except that the second column in the file contains frequency" << endl; + cerr << " multipliers rather than pitch offsets (the same as the difference between" << endl; + cerr << " pitch and frequency options above)." << endl; + cerr << " This option implies realtime mode (-R)." << endl; cerr << endl; cerr << "The following options provide a simple way to adjust the sound. See below" << endl; cerr << "for more details." << endl; @@ -302,34 +327,35 @@ int main(int argc, char **argv) cerr << ")" << endl; } - std::map mapping; - - if (mapfile != "") { - std::ifstream ifile(mapfile.c_str()); + std::map timeMap; + if (timeMapFile != "") { + std::ifstream ifile(timeMapFile.c_str()); if (!ifile.is_open()) { - cerr << "ERROR: Failed to open time map file \"" << mapfile << "\"" - << endl; + cerr << "ERROR: Failed to open time map file \"" + << timeMapFile << "\"" << endl; return 1; } std::string line; int lineno = 0; while (!ifile.eof()) { std::getline(ifile, line); - while (line.length() > 0 && line[0] == ' ') line = line.substr(1); + while (line.length() > 0 && line[0] == ' ') { + line = line.substr(1); + } if (line == "") { ++lineno; continue; } std::string::size_type i = line.find_first_of(" "); if (i == std::string::npos) { - cerr << "ERROR: Time map file \"" << mapfile + cerr << "ERROR: Time map file \"" << timeMapFile << "\" is malformed at line " << lineno << endl; return 1; } size_t source = atoi(line.substr(0, i).c_str()); while (i < line.length() && line[i] == ' ') ++i; size_t target = atoi(line.substr(i).c_str()); - mapping[source] = target; + timeMap[source] = target; if (debug > 0) { cerr << "adding mapping from " << source << " to " << target << endl; } @@ -338,7 +364,48 @@ int main(int argc, char **argv) ifile.close(); if (!quiet) { - cerr << "Read " << mapping.size() << " line(s) from map file" << endl; + cerr << "Read " << timeMap.size() << " line(s) from time map file" << endl; + } + } + + std::map freqMap; + if (freqMapFile != "") { + std::ifstream ifile(freqMapFile.c_str()); + if (!ifile.is_open()) { + cerr << "ERROR: Failed to open frequency map file \"" + << freqMapFile << "\"" << endl; + return 1; + } + std::string line; + int lineno = 0; + while (!ifile.eof()) { + std::getline(ifile, line); + while (line.length() > 0 && line[0] == ' ') { + line = line.substr(1); + } + if (line == "") { + ++lineno; + continue; + } + std::string::size_type i = line.find_first_of(" "); + if (i == std::string::npos) { + cerr << "ERROR: Frequency map file \"" << freqMapFile + << "\" is malformed at line " << lineno << endl; + return 1; + } + size_t source = atoi(line.substr(0, i).c_str()); + while (i < line.length() && line[i] == ' ') ++i; + double freq = atof(line.substr(i).c_str()); + freqMap[source] = freq; + if (debug > 0) { + cerr << "adding mapping for source frame " << source << " of frequency multiplier " << freq << endl; + } + ++lineno; + } + ifile.close(); + + if (!quiet) { + cerr << "Read " << freqMap.size() << " line(s) from frequency map file" << endl; } } @@ -510,8 +577,8 @@ int main(int argc, char **argv) frame = 0; percent = 0; - if (!mapping.empty()) { - ts.setKeyFrameMap(mapping); + if (!timeMap.empty()) { + ts.setKeyFrameMap(timeMap); } size_t countIn = 0, countOut = 0; From 5f897656f5267689cabdf3e0fe69194d6d47b10d Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 10 Sep 2020 16:10:30 +0100 Subject: [PATCH 02/23] First cut at freq/pitch map --- main/main.cpp | 81 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 19 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index d9a59fc..d8831b6 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -201,6 +201,15 @@ int main(int argc, char **argv) return 0; } + if (freqMapFile != "" || pitchMapFile != "") { + if (freqMapFile != "" && pitchMapFile != "") { + cerr << "ERROR: Please specify either pitch map or frequency map, not both" << endl; + return 1; + } + haveRatio = true; + realtime = true; + } + if (help || !haveRatio || optind + 2 != argc) { cerr << endl; cerr << "Rubber Band" << endl; @@ -225,27 +234,27 @@ int main(int argc, char **argv) cerr << " -M, --timemap Use file F as the source for time map" << endl; cerr << endl; cerr << " A time map (or key-frame map) file contains a series of lines, each with two" << endl; - cerr << " sample frame numbers separated by a single space. These are source and" << endl; + cerr << " sample frame numbers separated by a single space. These are source and" << endl; cerr << " target frames for fixed time points within the audio data, defining a varying" << endl; cerr << " stretch factor through the audio." << endl; - cerr << " You must specify an overall stretch factor using e.g. -t as well." << endl; + cerr << " You must specify an overall stretch factor using -t, -T, or -D as well." << endl; cerr << endl; cerr << " --pitchmap Use file F as the source for pitch map" << endl; cerr << endl; cerr << " A pitch map file contains a series of lines, each with two values: a" << endl; cerr << " sample frame number and a pitch offset in semitones, separated by a single" << endl; cerr << " space. These specify a varying pitch factor through the audio. The offsets" << endl; - cerr << " are all relative to the initial offset of zero, not to the previous offset." << endl; - cerr << " This option implies realtime mode (-R)." << endl; + cerr << " are all relative to an initial offset specified by the pitch or frequency" << endl; + cerr << " option, or relative to no shift if neither was specified. Offsets are" << endl; + cerr << " not cumulative. This option implies realtime mode (-R)." << endl; cerr << endl; cerr << " --freqmap Use file F as the source for frequency map" << endl; cerr << endl; cerr << " As --pitchmap, except that the second column in the file contains frequency" << endl; cerr << " multipliers rather than pitch offsets (the same as the difference between" << endl; - cerr << " pitch and frequency options above)." << endl; - cerr << " This option implies realtime mode (-R)." << endl; + cerr << " pitch and frequency options above). This option implies realtime mode (-R)." << endl; cerr << endl; - cerr << "The following options provide a simple way to adjust the sound. See below" << endl; + cerr << "The following options provide a simple way to adjust the sound. See below" << endl; cerr << "for more details." << endl; cerr << endl; cerr << " -c, --crisp Crispness (N = 0,1,2,3,4,5,6); default 5 (see below)" << endl; @@ -254,7 +263,7 @@ int main(int argc, char **argv) cerr << "The remaining options fine-tune the processing mode and stretch algorithm." << endl; cerr << "These are mostly included for test purposes; the default settings and standard" << endl; cerr << "crispness parameter are intended to provide the best sounding set of options" << endl; - cerr << "for most situations. The default is to use none of these options." << endl; + cerr << "for most situations. The default is to use none of these options." << endl; cerr << endl; cerr << " -L, --loose Relax timing in hope of better transient preservation" << endl; cerr << " -P, --precise Ignored: The opposite of -L, this is default from 1.6" << endl; @@ -369,11 +378,16 @@ int main(int argc, char **argv) } std::map freqMap; - if (freqMapFile != "") { - std::ifstream ifile(freqMapFile.c_str()); + if (freqMapFile != "" || pitchMapFile != "") { + std::string file = freqMapFile; + bool convertFromPitch = false; + if (pitchMapFile != "") { + file = pitchMapFile; + convertFromPitch = true; + } + std::ifstream ifile(file.c_str()); if (!ifile.is_open()) { - cerr << "ERROR: Failed to open frequency map file \"" - << freqMapFile << "\"" << endl; + cerr << "ERROR: Failed to open map file \"" << file << "\"" << endl; return 1; } std::string line; @@ -389,13 +403,16 @@ int main(int argc, char **argv) } std::string::size_type i = line.find_first_of(" "); if (i == std::string::npos) { - cerr << "ERROR: Frequency map file \"" << freqMapFile + cerr << "ERROR: Map file \"" << file << "\" is malformed at line " << lineno << endl; return 1; } size_t source = atoi(line.substr(0, i).c_str()); while (i < line.length() && line[i] == ' ') ++i; double freq = atof(line.substr(i).c_str()); + if (convertFromPitch) { + freq = pow(2.0, freq / 12.0); + } freqMap[source] = freq; if (debug > 0) { cerr << "adding mapping for source frame " << source << " of frequency multiplier " << freq << endl; @@ -505,12 +522,17 @@ int main(int argc, char **argv) } if (pitchshift != 0.0) { - frequencyshift *= pow(2.0, pitchshift / 12); + frequencyshift *= pow(2.0, pitchshift / 12.0); } cerr << "Using time ratio " << ratio; - cerr << " and frequency ratio " << frequencyshift << endl; + if (freqMap.empty()) { + cerr << " and frequency ratio " << frequencyshift << endl; + } else { + cerr << " and initial frequency ratio " << frequencyshift << endl; + } + #ifdef _WIN32 RubberBand:: #endif @@ -580,14 +602,35 @@ int main(int argc, char **argv) if (!timeMap.empty()) { ts.setKeyFrameMap(timeMap); } + + std::map::const_iterator freqMapItr = freqMap.begin(); size_t countIn = 0, countOut = 0; while (frame < sfinfo.frames) { int count = -1; + int thisBlockSize = ibs; - if ((count = sf_readf_float(sndfile, fbuf, ibs)) < 0) break; + while (freqMapItr != freqMap.end()) { + size_t nextFreqFrame = freqMapItr->first; + if (nextFreqFrame <= countIn) { + double s = frequencyshift * freqMapItr->second; + if (debug > 0) { + cerr << "at frame " << countIn + << " updating frequency ratio to " << s << endl; + } + ts.setPitchScale(s); + ++freqMapItr; + } else { + if (nextFreqFrame < countIn + thisBlockSize) { + thisBlockSize = nextFreqFrame - countIn; + } + break; + } + } + + if ((count = sf_readf_float(sndfile, fbuf, thisBlockSize)) < 0) break; countIn += count; @@ -598,10 +641,10 @@ int main(int argc, char **argv) } } - bool final = (frame + ibs >= sfinfo.frames); + bool final = (frame + thisBlockSize >= sfinfo.frames); if (debug > 2) { - cerr << "count = " << count << ", ibs = " << ibs << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl; + cerr << "count = " << count << ", ibs = " << thisBlockSize << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl; } ts.process(ibuf, count, final); @@ -652,7 +695,7 @@ int main(int argc, char **argv) } } - frame += ibs; + frame += thisBlockSize; } if (!quiet) { From 50328a8fd5727e0ef7d249052849ce7da75855be Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 10 Sep 2020 16:23:56 +0100 Subject: [PATCH 03/23] Latency-adjusted --- main/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index d8831b6..a16bef3 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -613,11 +613,11 @@ int main(int argc, char **argv) int thisBlockSize = ibs; while (freqMapItr != freqMap.end()) { - size_t nextFreqFrame = freqMapItr->first; + size_t nextFreqFrame = freqMapItr->first + ts.getLatency(); if (nextFreqFrame <= countIn) { double s = frequencyshift * freqMapItr->second; if (debug > 0) { - cerr << "at frame " << countIn + cerr << "at latency-adjusted frame " << countIn << " updating frequency ratio to " << s << endl; } ts.setPitchScale(s); From 2f1ee7327c9c009616078703c8e50ce4a80f9987 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 10 Sep 2020 17:10:34 +0100 Subject: [PATCH 04/23] Wording --- main/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index a16bef3..f6f8d7d 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -236,8 +236,8 @@ int main(int argc, char **argv) cerr << " A time map (or key-frame map) file contains a series of lines, each with two" << endl; cerr << " sample frame numbers separated by a single space. These are source and" << endl; cerr << " target frames for fixed time points within the audio data, defining a varying" << endl; - cerr << " stretch factor through the audio." << endl; - cerr << " You must specify an overall stretch factor using -t, -T, or -D as well." << endl; + cerr << " stretch factor through the audio. When supplying a time map, you must specify" << endl; + cerr << " an overall stretch factor using -t, -T, or -D as well." << endl; cerr << endl; cerr << " --pitchmap Use file F as the source for pitch map" << endl; cerr << endl; From dab3e9f69b2bc60ef32d28c9118c57e9a5be5b00 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 11 Sep 2020 10:51:03 +0100 Subject: [PATCH 05/23] Force high-consistency pitch-shift mode when using a freq or pitch map --- main/main.cpp | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index f6f8d7d..837b5fa 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -107,6 +107,7 @@ int main(int argc, char **argv) std::string timeMapFile; std::string freqMapFile; std::string pitchMapFile; + bool freqOrPitchMapSpecified = false; enum { NoTransients, @@ -190,8 +191,8 @@ int main(int argc, char **argv) case 'c': crispness = atoi(optarg); break; case 'q': quiet = true; break; case 'M': timeMapFile = optarg; break; - case 'Q': freqMapFile = optarg; break; - case 'C': pitchMapFile = optarg; break; + case 'Q': freqMapFile = optarg; freqOrPitchMapSpecified = true; break; + case 'C': pitchMapFile = optarg; freqOrPitchMapSpecified = true; break; default: help = true; break; } } @@ -201,7 +202,7 @@ int main(int argc, char **argv) return 0; } - if (freqMapFile != "" || pitchMapFile != "") { + if (freqOrPitchMapSpecified) { if (freqMapFile != "" && pitchMapFile != "") { cerr << "ERROR: Please specify either pitch map or frequency map, not both" << endl; return 1; @@ -246,13 +247,14 @@ int main(int argc, char **argv) cerr << " space. These specify a varying pitch factor through the audio. The offsets" << endl; cerr << " are all relative to an initial offset specified by the pitch or frequency" << endl; cerr << " option, or relative to no shift if neither was specified. Offsets are" << endl; - cerr << " not cumulative. This option implies realtime mode (-R)." << endl; + cerr << " not cumulative. This option implies realtime mode (-R) and also enables a" << endl; + cerr << " high-consistency pitch shifting mode, appropriate for dynamic pitch changes." << endl; cerr << endl; cerr << " --freqmap Use file F as the source for frequency map" << endl; cerr << endl; cerr << " As --pitchmap, except that the second column in the file contains frequency" << endl; cerr << " multipliers rather than pitch offsets (the same as the difference between" << endl; - cerr << " pitch and frequency options above). This option implies realtime mode (-R)." << endl; + cerr << " pitch and frequency options above)." << endl; cerr << endl; cerr << "The following options provide a simple way to adjust the sound. See below" << endl; cerr << "for more details." << endl; @@ -311,6 +313,12 @@ int main(int argc, char **argv) cerr << " provided -- crispness will override these other options" << endl; } + if (hqpitch && freqOrPitchMapSpecified) { + cerr << "WARNING: High-quality pitch mode selected, but frequency or pitch map file is" << endl; + cerr << " provided -- pitch mode will be overridden by high-consistency mode" << endl; + hqpitch = false; + } + switch (crispness) { case -1: crispness = 5; break; case 0: detector = CompoundDetector; transients = NoTransients; lamination = false; longwin = true; shortwin = false; break; @@ -378,7 +386,8 @@ int main(int argc, char **argv) } std::map freqMap; - if (freqMapFile != "" || pitchMapFile != "") { + + if (freqOrPitchMapSpecified) { std::string file = freqMapFile; bool convertFromPitch = false; if (pitchMapFile != "") { @@ -485,6 +494,10 @@ int main(int argc, char **argv) if (hqpitch) options |= RubberBandStretcher::OptionPitchHighQuality; if (together) options |= RubberBandStretcher::OptionChannelsTogether; + if (freqOrPitchMapSpecified) { + options |= RubberBandStretcher::OptionPitchHighConsistency; + } + switch (threading) { case 0: options |= RubberBandStretcher::OptionThreadingAuto; @@ -527,7 +540,7 @@ int main(int argc, char **argv) cerr << "Using time ratio " << ratio; - if (freqMap.empty()) { + if (!freqOrPitchMapSpecified) { cerr << " and frequency ratio " << frequencyshift << endl; } else { cerr << " and initial frequency ratio " << frequencyshift << endl; From 732d0ed9809e5521b91680ff3ac8f6f8ee99d2c3 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 11 Sep 2020 17:11:03 +0100 Subject: [PATCH 06/23] More informative output --- main/main.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index 837b5fa..53c865c 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -630,8 +630,10 @@ int main(int argc, char **argv) if (nextFreqFrame <= countIn) { double s = frequencyshift * freqMapItr->second; if (debug > 0) { - cerr << "at latency-adjusted frame " << countIn - << " updating frequency ratio to " << s << endl; + cerr << "at frame " << countIn + << " (requested at " << freqMapItr->first + << " plus latency " << ts.getLatency() + << ") updating frequency ratio to " << s << endl; } ts.setPitchScale(s); ++freqMapItr; From 199268815552a628012f0f8468e310086225e5e2 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 15 Sep 2020 11:43:57 +0100 Subject: [PATCH 07/23] Detect out-of-range samples on output and restart with lower gain --- main/main.cpp | 384 +++++++++++++++++++++++++++----------------------- 1 file changed, 211 insertions(+), 173 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index 53c865c..6404fbe 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -121,6 +121,8 @@ int main(int argc, char **argv) SoftDetector } detector = CompoundDetector; + bool ignoreClipping = false; + while (1) { int optionIndex = 0; @@ -155,6 +157,7 @@ int main(int argc, char **argv) { "timemap", 1, 0, 'M' }, { "freqmap", 1, 0, 'Q' }, { "pitchmap", 1, 0, 'C' }, + { "ignore-clipping", 0, 0, 'i' }, { 0, 0, 0, 0 } }; @@ -175,7 +178,7 @@ int main(int argc, char **argv) case 'R': realtime = true; break; case 'L': precise = false; break; case 'P': precise = true; break; - case 'F': formant = true; break; + case 'F': formant = true; break; case '0': threading = 1; break; case '@': threading = 2; break; case '1': transients = NoTransients; crispchanged = true; break; @@ -193,6 +196,7 @@ int main(int argc, char **argv) case 'M': timeMapFile = optarg; break; case 'Q': freqMapFile = optarg; freqOrPitchMapSpecified = true; break; case 'C': pitchMapFile = optarg; freqOrPitchMapSpecified = true; break; + case 'i': ignoreClipping = true; break; default: help = true; break; } } @@ -213,11 +217,11 @@ int main(int argc, char **argv) if (help || !haveRatio || optind + 2 != argc) { cerr << endl; - cerr << "Rubber Band" << endl; + cerr << "Rubber Band" << endl; cerr << "An audio time-stretching and pitch-shifting library and utility program." << endl; - cerr << "Copyright 2007-2020 Particular Programs Ltd." << endl; + cerr << "Copyright 2007-2020 Particular Programs Ltd." << endl; cerr << endl; - cerr << " Usage: " << argv[0] << " [options] " << endl; + cerr << " Usage: " << argv[0] << " [options] " << endl; cerr << endl; cerr << "You must specify at least one of the following time and pitch ratio options." << endl; cerr << endl; @@ -260,7 +264,7 @@ int main(int argc, char **argv) cerr << "for more details." << endl; cerr << endl; cerr << " -c, --crisp Crispness (N = 0,1,2,3,4,5,6); default 5 (see below)" << endl; - cerr << " -F, --formant Enable formant preservation when pitch shifting" << endl; + cerr << " -F, --formant Enable formant preservation when pitch shifting" << endl; cerr << endl; cerr << "The remaining options fine-tune the processing mode and stretch algorithm." << endl; cerr << "These are mostly included for test purposes; the default settings and standard" << endl; @@ -283,6 +287,8 @@ int main(int argc, char **argv) cerr << " --pitch-hq In RT mode, use a slower, higher quality pitch shift" << endl; cerr << " --centre-focus Preserve focus of centre material in stereo" << endl; cerr << " (at a cost in width and individual channel quality)" << endl; + cerr << " --ignore-clipping Ignore clipping at output; the default is to restart" << endl; + cerr << " with reduced gain if clipping occurs" << endl; cerr << endl; cerr << " -d, --debug Select debug level (N = 0,1,2,3); default 0, full 3" << endl; cerr << " (N.B. debug level 3 includes audible ticks in output)" << endl; @@ -300,7 +306,7 @@ int main(int argc, char **argv) cerr << " -c 5 default processing options" << endl; cerr << " -c 6 equivalent to --no-lamination --window-short (may be good for drums)" << endl; cerr << endl; - return 2; + return 2; } if (ratio <= 0.0) { @@ -447,9 +453,9 @@ int main(int argc, char **argv) sndfile = sf_open(fileName, SFM_READ, &sfinfo); if (!sndfile) { - cerr << "ERROR: Failed to open input file \"" << fileName << "\": " - << sf_strerror(sndfile) << endl; - return 1; + cerr << "ERROR: Failed to open input file \"" << fileName << "\": " + << sf_strerror(sndfile) << endl; + return 1; } if (sfinfo.samplerate == 0) { @@ -475,9 +481,9 @@ int main(int argc, char **argv) sndfileOut = sf_open(fileNameOut, SFM_WRITE, &sfinfoOut) ; if (!sndfileOut) { - cerr << "ERROR: Failed to open output file \"" << fileNameOut << "\" for writing: " - << sf_strerror(sndfileOut) << endl; - return 1; + cerr << "ERROR: Failed to open output file \"" << fileNameOut << "\" for writing: " + << sf_strerror(sndfileOut) << endl; + return 1; } int ibs = 1024; @@ -551,35 +557,118 @@ int main(int argc, char **argv) #endif timeval tv; (void)gettimeofday(&tv, 0); - + RubberBandStretcher::setDefaultDebugLevel(debug); - RubberBandStretcher ts(sfinfo.samplerate, channels, options, - ratio, frequencyshift); + size_t countIn = 0, countOut = 0; - ts.setExpectedInputDuration(sfinfo.frames); + float gain = 1.f; + bool successful = false; - float *fbuf = new float[channels * ibs]; - float **ibuf = new float *[channels]; - for (size_t i = 0; i < channels; ++i) ibuf[i] = new float[ibs]; + while (!successful) { // we may have to repeat with a modified + // gain, if clipping occurs + successful = true; - int frame = 0; - int percent = 0; + RubberBandStretcher ts(sfinfo.samplerate, channels, options, + ratio, frequencyshift); + ts.setExpectedInputDuration(sfinfo.frames); - sf_seek(sndfile, 0, SEEK_SET); - - if (!realtime) { - - if (!quiet) { - cerr << "Pass 1: Studying..." << endl; + float *fbuf = new float[channels * ibs]; + float **ibuf = new float *[channels]; + for (size_t i = 0; i < channels; ++i) { + ibuf[i] = new float[ibs]; } + int frame = 0; + int percent = 0; + + sf_seek(sndfile, 0, SEEK_SET); + + if (!realtime) { + + if (!quiet) { + cerr << "Pass 1: Studying..." << endl; + } + + while (frame < sfinfo.frames) { + + int count = -1; + + if ((count = sf_readf_float(sndfile, fbuf, ibs)) <= 0) break; + + for (size_t c = 0; c < channels; ++c) { + for (int i = 0; i < count; ++i) { + float value = fbuf[i * channels + c]; + ibuf[c][i] = value; + } + } + + bool final = (frame + ibs >= sfinfo.frames); + + ts.study(ibuf, count, final); + + int p = int((double(frame) * 100.0) / sfinfo.frames); + if (p > percent || frame == 0) { + percent = p; + if (!quiet) { + cerr << "\r" << percent << "% "; + } + } + + frame += ibs; + } + + if (!quiet) { + cerr << "\rCalculating profile..." << endl; + } + + sf_seek(sndfile, 0, SEEK_SET); + } + + frame = 0; + percent = 0; + + if (!timeMap.empty()) { + ts.setKeyFrameMap(timeMap); + } + + std::map::const_iterator freqMapItr = freqMap.begin(); + + countIn = 0; + countOut = 0; + bool clipping = false; + while (frame < sfinfo.frames) { int count = -1; + int thisBlockSize = ibs; - if ((count = sf_readf_float(sndfile, fbuf, ibs)) <= 0) break; + while (freqMapItr != freqMap.end()) { + size_t nextFreqFrame = freqMapItr->first + ts.getLatency(); + if (nextFreqFrame <= countIn) { + double s = frequencyshift * freqMapItr->second; + if (debug > 0) { + cerr << "at frame " << countIn + << " (requested at " << freqMapItr->first + << " plus latency " << ts.getLatency() + << ") updating frequency ratio to " << s << endl; + } + ts.setPitchScale(s); + ++freqMapItr; + } else { + if (nextFreqFrame < countIn + thisBlockSize) { + thisBlockSize = nextFreqFrame - countIn; + } + break; + } + } + if ((count = sf_readf_float(sndfile, fbuf, thisBlockSize)) < 0) { + break; + } + + countIn += count; + for (size_t c = 0; c < channels; ++c) { for (int i = 0; i < count; ++i) { float value = fbuf[i * channels + c]; @@ -587,9 +676,62 @@ int main(int argc, char **argv) } } - bool final = (frame + ibs >= sfinfo.frames); + bool final = (frame + thisBlockSize >= sfinfo.frames); - ts.study(ibuf, count, final); + if (debug > 2) { + cerr << "count = " << count << ", ibs = " << thisBlockSize << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl; + } + + ts.process(ibuf, count, final); + + int avail = ts.available(); + if (debug > 1) cerr << "available = " << avail << endl; + + if (avail > 0) { + float **obf = new float *[channels]; + for (size_t i = 0; i < channels; ++i) { + obf[i] = new float[avail]; + } + ts.retrieve(obf, avail); + countOut += avail; + float *fobf = new float[channels * avail]; + for (size_t c = 0; c < channels; ++c) { + for (int i = 0; i < avail; ++i) { + float value = gain * obf[c][i]; + if (ignoreClipping) { // i.e. just clamp, don't bail out + if (value > 1.f) value = 1.f; + if (value < -1.f) value = -1.f; + } else { + if (value >= 1.f || value < -1.f) { + clipping = true; + gain = (0.999f / fabsf(obf[c][i])); + } + } + fobf[i * channels + c] = value; + } + } + sf_writef_float(sndfileOut, fobf, avail); + delete[] fobf; + for (size_t i = 0; i < channels; ++i) { + delete[] obf[i]; + } + delete[] obf; + } + + if (clipping) { + if (!quiet) { + cerr << "NOTE: Clipping detected at output sample " + << countOut << ", restarting with " + << "reduced gain of " << gain + << " (supply --ignore-clipping to avoid this)" << endl; + } + successful = false; + break; + } + + if (frame == 0 && !realtime && !quiet) { + cerr << "Pass 2: Processing..." << endl; + } int p = int((double(frame) * 100.0) / sfinfo.frames); if (p > percent || frame == 0) { @@ -599,159 +741,55 @@ int main(int argc, char **argv) } } - frame += ibs; + frame += thisBlockSize; } - if (!quiet) { - cerr << "\rCalculating profile..." << endl; + if (!successful) { + sf_seek(sndfile, 0, SEEK_SET); + sf_seek(sndfileOut, 0, SEEK_SET); + continue; } - - sf_seek(sndfile, 0, SEEK_SET); - } - - frame = 0; - percent = 0; - - if (!timeMap.empty()) { - ts.setKeyFrameMap(timeMap); - } - - std::map::const_iterator freqMapItr = freqMap.begin(); - size_t countIn = 0, countOut = 0; + if (!quiet) { + cerr << "\r " << endl; + } + int avail; - while (frame < sfinfo.frames) { + while ((avail = ts.available()) >= 0) { - int count = -1; - int thisBlockSize = ibs; - - while (freqMapItr != freqMap.end()) { - size_t nextFreqFrame = freqMapItr->first + ts.getLatency(); - if (nextFreqFrame <= countIn) { - double s = frequencyshift * freqMapItr->second; - if (debug > 0) { - cerr << "at frame " << countIn - << " (requested at " << freqMapItr->first - << " plus latency " << ts.getLatency() - << ") updating frequency ratio to " << s << endl; + if (debug > 1) { + cerr << "(completing) available = " << avail << endl; + } + + if (avail > 0) { + float **obf = new float *[channels]; + for (size_t i = 0; i < channels; ++i) { + obf[i] = new float[avail]; } - ts.setPitchScale(s); - ++freqMapItr; + ts.retrieve(obf, avail); + countOut += avail; + float *fobf = new float[channels * avail]; + for (size_t c = 0; c < channels; ++c) { + for (int i = 0; i < avail; ++i) { + float value = gain * obf[c][i]; + if (value > 1.f) value = 1.f; + if (value < -1.f) value = -1.f; + fobf[i * channels + c] = value; + } + } + + sf_writef_float(sndfileOut, fobf, avail); + delete[] fobf; + for (size_t i = 0; i < channels; ++i) { + delete[] obf[i]; + } + delete[] obf; } else { - if (nextFreqFrame < countIn + thisBlockSize) { - thisBlockSize = nextFreqFrame - countIn; - } - break; + usleep(10000); } } - - if ((count = sf_readf_float(sndfile, fbuf, thisBlockSize)) < 0) break; - - countIn += count; - - for (size_t c = 0; c < channels; ++c) { - for (int i = 0; i < count; ++i) { - float value = fbuf[i * channels + c]; - ibuf[c][i] = value; - } - } - - bool final = (frame + thisBlockSize >= sfinfo.frames); - - if (debug > 2) { - cerr << "count = " << count << ", ibs = " << thisBlockSize << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl; - } - - ts.process(ibuf, count, final); - - int avail = ts.available(); - if (debug > 1) cerr << "available = " << avail << endl; - - if (avail > 0) { - float **obf = new float *[channels]; - for (size_t i = 0; i < channels; ++i) { - obf[i] = new float[avail]; - } - ts.retrieve(obf, avail); - countOut += avail; - float *fobf = new float[channels * avail]; - for (size_t c = 0; c < channels; ++c) { - for (int i = 0; i < avail; ++i) { - float value = obf[c][i]; - if (value > 1.f) value = 1.f; - if (value < -1.f) value = -1.f; - fobf[i * channels + c] = value; - } - } -// cout << "fobf mean: "; -// double d = 0; -// for (int i = 0; i < avail; ++i) { -// d += fobf[i]; -// } -// d /= avail; -// cout << d << endl; - sf_writef_float(sndfileOut, fobf, avail); - delete[] fobf; - for (size_t i = 0; i < channels; ++i) { - delete[] obf[i]; - } - delete[] obf; - } - - if (frame == 0 && !realtime && !quiet) { - cerr << "Pass 2: Processing..." << endl; - } - - int p = int((double(frame) * 100.0) / sfinfo.frames); - if (p > percent || frame == 0) { - percent = p; - if (!quiet) { - cerr << "\r" << percent << "% "; - } - } - - frame += thisBlockSize; - } - - if (!quiet) { - cerr << "\r " << endl; - } - int avail; - - while ((avail = ts.available()) >= 0) { - - if (debug > 1) { - cerr << "(completing) available = " << avail << endl; - } - - if (avail > 0) { - float **obf = new float *[channels]; - for (size_t i = 0; i < channels; ++i) { - obf[i] = new float[avail]; - } - ts.retrieve(obf, avail); - countOut += avail; - float *fobf = new float[channels * avail]; - for (size_t c = 0; c < channels; ++c) { - for (int i = 0; i < avail; ++i) { - float value = obf[c][i]; - if (value > 1.f) value = 1.f; - if (value < -1.f) value = -1.f; - fobf[i * channels + c] = value; - } - } - - sf_writef_float(sndfileOut, fobf, avail); - delete[] fobf; - for (size_t i = 0; i < channels; ++i) { - delete[] obf[i]; - } - delete[] obf; - } else { - usleep(10000); - } } - + sf_close(sndfile); sf_close(sndfileOut); @@ -762,7 +800,7 @@ int main(int argc, char **argv) #ifdef _WIN32 RubberBand:: #endif - timeval etv; + timeval etv; (void)gettimeofday(&etv, 0); etv.tv_sec -= tv.tv_sec; From 334f1b891d6af62896e67783dc3e47630a27fed0 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 15 Sep 2020 13:43:31 +0100 Subject: [PATCH 08/23] ... but don't do so ad infinitum, if some processing problem is causing implausibly high output values --- main/main.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/main/main.cpp b/main/main.cpp index 6404fbe..6369a25 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -725,6 +725,14 @@ int main(int argc, char **argv) << "reduced gain of " << gain << " (supply --ignore-clipping to avoid this)" << endl; } + const float mingain = 0.75f; + if (gain < mingain) { + cerr << "WARNING: Clipped values were implausibly high: " + << "something wrong with input or process - " + << "not reducing gain below " << mingain << endl; + gain = mingain; + ignoreClipping = true; + } successful = false; break; } From de618103f771bfebc6ef7de6b8776ba93de6d607 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 15 Sep 2020 13:46:18 +0100 Subject: [PATCH 09/23] Experiment with small crossfade on resampler rate change --- src/dsp/Resampler.cpp | 91 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 15 deletions(-) diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index d2db61f..48fdbd4 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -609,6 +609,10 @@ D_SRC::resample(const float *const R__ *const R__ in, { SRC_DATA data; + static size_t n_in = 0, n_out = 0; + + std::cerr << "Resampler::process: at input sample " << n_in << ", output sample " << n_out << ", incount = " << incount << ", ratio = " << ratio << ", incount * ratio = " << incount * ratio << std::endl; + int outcount = lrintf(ceilf(incount * ratio)); if (m_channels == 1) { @@ -628,12 +632,57 @@ D_SRC::resample(const float *const R__ *const R__ in, data.data_out = m_iout; } - data.input_frames = incount; - data.output_frames = outcount; data.src_ratio = ratio; - data.end_of_input = (final ? 1 : 0); - int err = src_process(m_src, &data); +#ifdef PERFORM_LIBSAMPLERATE_XFADE + const int xfade = 10; + int err = 0; + if (ratio != m_lastRatio && outcount > xfade) { + int xin = lrintf(ceilf(xfade / ratio)); + SRC_STATE *xsrc = src_clone(m_src, &err); + float *xbuf = 0; + if (err) { + std::cerr << "Resampler::process: libsamplerate error: " + << src_strerror(err) << ", skipping xfade" << std::endl; + } else { + + data.input_frames = xin; + data.output_frames = xfade; + data.end_of_input = false; + + err = src_process(xsrc, &data); + xbuf = allocate(xfade * m_channels); + v_copy(xbuf, data.data_out, xfade * m_channels); + src_delete(xsrc); + } + + data.input_frames = incount; + data.output_frames = outcount; + data.end_of_input = (final ? 1 : 0); + + src_set_ratio(m_src, ratio); + err = src_process(m_src, &data); + + if (xbuf) { + for (int i = 0; i < xfade; ++i) { + for (int c = 0; c < m_channels; ++c) { + float g = float(i+1) / float(xfade); + float f = data.data_out[i * m_channels + c] * g + + xbuf[i * m_channels + c] * (1.f - g); + data.data_out[i * m_channels + c] = f; + } + } + deallocate(xbuf); + } + } else { +#endif + data.input_frames = incount; + data.output_frames = outcount; + data.end_of_input = (final ? 1 : 0); + err = src_process(m_src, &data); +#ifdef PERFORM_LIBSAMPLERATE_XFADE + } +#endif if (err) { std::cerr << "Resampler::process: libsamplerate error: " @@ -649,6 +698,13 @@ D_SRC::resample(const float *const R__ *const R__ in, m_lastRatio = ratio; + n_in += incount; + n_out += data.output_frames_gen; + + std::cerr << "outcount = " << data.output_frames_gen << std::endl; + +// out[0][0] = 1.0; + return data.output_frames_gen; } @@ -670,7 +726,7 @@ D_SRC::resampleInterleaved(const float *const R__ in, data.output_frames = outcount; data.src_ratio = ratio; data.end_of_input = (final ? 1 : 0); - + int err = src_process(m_src, &data); if (err) { @@ -942,6 +998,8 @@ D_Speex::D_Speex(Resampler::Quality quality, int channels, int maxBufferSize, << std::endl; } + q = 10; //!!! + int err = 0; m_resampler = speex_resampler_init_frac(m_channels, 1, 1, @@ -1030,7 +1088,10 @@ D_Speex::resample(const float *const R__ *const R__ in, } unsigned int uincount = incount; - unsigned int outcount = lrintf(ceilf(incount * ratio)); //!!! inexact now + + // This doesn't have to be exact, but it does have to be + // sufficient, hence going over by one + unsigned int outcount = lrintf(ceilf((incount + 1) * ratio)); float *data_in, *data_out; @@ -1057,16 +1118,16 @@ D_Speex::resample(const float *const R__ *const R__ in, data_out, &outcount); -// if (incount != int(uincount)) { -// std::cerr << "Resampler: NOTE: Consumed " << uincount -// << " of " << incount << " frames" << std::endl; -// } + if (incount != int(uincount)) { + std::cerr << "Resampler: NOTE: Consumed " << uincount + << " of " << incount << " frames" << std::endl; + } -// if (outcount != lrintf(ceilf(incount * ratio))) { -// std::cerr << "Resampler: NOTE: Obtained " << outcount -// << " of " << lrintf(ceilf(incount * ratio)) << " frames" -// << std::endl; -// } + if (outcount != lrintf(ceilf(incount * ratio))) { + std::cerr << "Resampler: NOTE: Obtained " << outcount + << " of " << lrintf(ceilf(incount * ratio)) << " frames" + << std::endl; + } //!!! check err, respond appropriately From 27f4572258704c1acdab2101ef81902d692a229b Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 15 Sep 2020 17:36:21 +0100 Subject: [PATCH 10/23] Fix silly compile error --- src/dsp/Resampler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index 48fdbd4..14dbf28 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -634,9 +634,10 @@ D_SRC::resample(const float *const R__ *const R__ in, data.src_ratio = ratio; + int err = 0; + #ifdef PERFORM_LIBSAMPLERATE_XFADE const int xfade = 10; - int err = 0; if (ratio != m_lastRatio && outcount > xfade) { int xin = lrintf(ceilf(xfade / ratio)); SRC_STATE *xsrc = src_clone(m_src, &err); From 661a1d05eb8ed05658f72f8bd6cbc1556b0cc8f3 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Wed, 16 Sep 2020 17:52:33 +0100 Subject: [PATCH 11/23] It appears this may be all we need - apparently the occasional odd extra sample (due to incremental rounding) added up and eventually overran the filter. I believe +1 is perfectly good here, but we can afford to be more accommodating just in case --- src/dsp/Resampler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index 14dbf28..205fc7f 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -613,7 +613,7 @@ D_SRC::resample(const float *const R__ *const R__ in, std::cerr << "Resampler::process: at input sample " << n_in << ", output sample " << n_out << ", incount = " << incount << ", ratio = " << ratio << ", incount * ratio = " << incount * ratio << std::endl; - int outcount = lrintf(ceilf(incount * ratio)); + int outcount = lrintf(ceilf(incount * ratio) + 10); if (m_channels == 1) { data.data_in = const_cast(*in); //!!!??? From d65f5ebe345b92265a1d3aadefe73ccef93cd703 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Mon, 21 Sep 2020 13:48:45 +0100 Subject: [PATCH 12/23] Fix potential discontinuity when window size is reduced due to change in ratio --- src/StretcherProcess.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/StretcherProcess.cpp b/src/StretcherProcess.cpp index 7fc118e..39da682 100644 --- a/src/StretcherProcess.cpp +++ b/src/StretcherProcess.cpp @@ -1018,7 +1018,7 @@ RubberBandStretcher::Impl::synthesiseChunk(size_t channel, m_swindow->cut(fltbuf); v_add(accumulator, fltbuf, wsz); - cd.accumulatorFill = wsz; + cd.accumulatorFill = std::max(cd.accumulatorFill, size_t(wsz)); if (wsz > fsz) { // reuse fltbuf to calculate interpolating window shape for @@ -1041,7 +1041,7 @@ RubberBandStretcher::Impl::writeChunk(size_t channel, size_t shiftIncrement, boo float *const R__ accumulator = cd.accumulator; float *const R__ windowAccumulator = cd.windowAccumulator; - const int sz = m_sWindowSize; + const int sz = cd.accumulatorFill; const int si = shiftIncrement; if (m_debugLevel > 2) { From fddcfadd646109a774d3f08ed4a1767c2416e4b9 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 22 Sep 2020 10:32:00 +0100 Subject: [PATCH 13/23] Clarify text about overall stretch factor --- main/main.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index 6369a25..08db475 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -241,18 +241,20 @@ int main(int argc, char **argv) cerr << " A time map (or key-frame map) file contains a series of lines, each with two" << endl; cerr << " sample frame numbers separated by a single space. These are source and" << endl; cerr << " target frames for fixed time points within the audio data, defining a varying" << endl; - cerr << " stretch factor through the audio. When supplying a time map, you must specify" << endl; - cerr << " an overall stretch factor using -t, -T, or -D as well." << endl; + cerr << " stretch factor through the audio. When supplying a time map you must specify" << endl; + cerr << " an overall stretch factor using -t, -T, or -D as well, to determine the" << endl; + cerr << " total output duration." << endl; cerr << endl; cerr << " --pitchmap Use file F as the source for pitch map" << endl; cerr << endl; - cerr << " A pitch map file contains a series of lines, each with two values: a" << endl; + cerr << " A pitch map file contains a series of lines, each with two values: the input" << endl; cerr << " sample frame number and a pitch offset in semitones, separated by a single" << endl; cerr << " space. These specify a varying pitch factor through the audio. The offsets" << endl; cerr << " are all relative to an initial offset specified by the pitch or frequency" << endl; cerr << " option, or relative to no shift if neither was specified. Offsets are" << endl; cerr << " not cumulative. This option implies realtime mode (-R) and also enables a" << endl; cerr << " high-consistency pitch shifting mode, appropriate for dynamic pitch changes." << endl; + cerr << " Because of the use of realtime mode, the overall duration will not be exact." << endl; cerr << endl; cerr << " --freqmap Use file F as the source for frequency map" << endl; cerr << endl; From c1108834c198339929d931290202ea9acfad864f Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Tue, 22 Sep 2020 10:35:50 +0100 Subject: [PATCH 14/23] In this emergency, increase the buffer size by a factor rather than only by a constant amount - because if this arises because of an overlong increment being broken down into littler bits, we would end up doing it again and again, which is much more expensive. This makes a huge performance difference in cases where a time map ends up mapping very little input into a great expanse of output --- src/StretcherProcess.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/StretcherProcess.cpp b/src/StretcherProcess.cpp index 39da682..d8c912e 100644 --- a/src/StretcherProcess.cpp +++ b/src/StretcherProcess.cpp @@ -521,7 +521,14 @@ RubberBandStretcher::Impl::processChunkForChannel(size_t c, // This is an unhappy situation. RingBuffer *oldbuf = cd.outbuf; - cd.outbuf = oldbuf->resized(oldbuf->getSize() + (required - ws)); + cd.outbuf = oldbuf->resized(oldbuf->getSize() * 2); + + if (m_debugLevel > 1) { + cerr << "(Write space was " << ws << ", needed " << required + << ": resized output buffer from " << oldbuf->getSize() + << " to " << cd.outbuf->getSize() << ")" << endl; + } + m_emergencyScavenger.claim(oldbuf); } From 3ddc35f7aecbcd094de7338c045b08e407c9772c Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Wed, 21 Oct 2020 15:05:14 +0100 Subject: [PATCH 15/23] Resampler code pulled back in from bqresample, with safer api --- src/StretcherImpl.cpp | 18 +- src/StretcherProcess.cpp | 10 +- src/dsp/Resampler.cpp | 670 ++++++++++++++++++++------------------- src/dsp/Resampler.h | 100 ++++-- src/system/Allocators.h | 4 +- 5 files changed, 443 insertions(+), 359 deletions(-) diff --git a/src/StretcherImpl.cpp b/src/StretcherImpl.cpp index cbf8b7b..2863ef8 100644 --- a/src/StretcherImpl.cpp +++ b/src/StretcherImpl.cpp @@ -672,9 +672,12 @@ RubberBandStretcher::Impl::configure() if (m_channelData[c]->resampler) continue; - m_channelData[c]->resampler = - new Resampler(Resampler::FastestTolerable, 1, 4096 * 16, - m_debugLevel); + Resampler::Parameters params; + params.quality = Resampler::FastestTolerable; + params.maxBufferSize = 4096 * 16; + params.debugLevel = m_debugLevel; + + m_channelData[c]->resampler = new Resampler(params, 1); // rbs is the amount of buffer space we think we'll need // for resampling; but allocate a sensible amount in case @@ -813,9 +816,12 @@ RubberBandStretcher::Impl::reconfigure() std::cerr << "WARNING: reconfigure(): resampler construction required in RT mode" << std::endl; - m_channelData[c]->resampler = - new Resampler(Resampler::FastestTolerable, 1, m_sWindowSize, - m_debugLevel); + Resampler::Parameters params; + params.quality = Resampler::FastestTolerable; + params.maxBufferSize = m_sWindowSize; + params.debugLevel = m_debugLevel; + + m_channelData[c]->resampler = new Resampler(params, 1); size_t rbs = lrintf(ceil((m_increment * m_timeRatio * 2) / m_pitchScale)); diff --git a/src/StretcherProcess.cpp b/src/StretcherProcess.cpp index d8c912e..b0bafad 100644 --- a/src/StretcherProcess.cpp +++ b/src/StretcherProcess.cpp @@ -217,8 +217,9 @@ RubberBandStretcher::Impl::consumeChannel(size_t c, input = inputs[c] + offset; } - toWrite = cd.resampler->resample(&input, - &cd.resamplebuf, + toWrite = cd.resampler->resample(&cd.resamplebuf, + cd.resamplebufSize, + &input, samples, 1.0 / m_pitchScale, final); @@ -1090,8 +1091,9 @@ RubberBandStretcher::Impl::writeChunk(size_t channel, size_t shiftIncrement, boo #endif #endif - size_t outframes = cd.resampler->resample(&cd.accumulator, - &cd.resamplebuf, + size_t outframes = cd.resampler->resample(&cd.resamplebuf, + cd.resamplebufSize, + &cd.accumulator, si, 1.0 / m_pitchScale, last); diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index 383c5a1..280e817 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -31,6 +31,7 @@ #include #include "system/Allocators.h" +#include "system/VectorOps.h" #ifdef HAVE_IPP #include @@ -65,23 +66,27 @@ #endif #endif +using namespace std; + namespace RubberBand { -class ResamplerImpl +class Resampler::Impl { public: - virtual ~ResamplerImpl() { } + virtual ~Impl() { } - virtual int resample(const float *const R__ *const R__ in, - float *const R__ *const R__ out, + virtual int resample(float *const R__ *const R__ out, + int outcount, + const float *const R__ *const R__ in, int incount, - float ratio, + double ratio, bool final) = 0; - virtual int resampleInterleaved(const float *const R__ in, - float *const R__ out, + virtual int resampleInterleaved(float *const R__ out, + int outcount, + const float *const R__ in, int incount, - float ratio, + double ratio, bool final) = 0; virtual int getChannelCount() const = 0; @@ -93,23 +98,25 @@ namespace Resamplers { #ifdef HAVE_IPP -class D_IPP : public ResamplerImpl +class D_IPP : public Resampler::Impl { public: - D_IPP(Resampler::Quality quality, int channels, int maxBufferSize, - int debugLevel); + D_IPP(Resampler::Quality quality, int channels, double initialSampleRate, + int maxBufferSize, int debugLevel); ~D_IPP(); - int resample(const float *const R__ *const R__ in, - float *const R__ *const R__ out, + int resample(float *const R__ *const R__ out, + int outcount, + const float *const R__ *const R__ in, int incount, - float ratio, + double ratio, bool final); - int resampleInterleaved(const float *const R__ in, - float *const R__ out, + int resampleInterleaved(float *const R__ out, + int outcount, + const float *const R__ in, int incount, - float ratio, + double ratio, bool final = false); int getChannelCount() const { return m_channels; } @@ -119,8 +126,9 @@ public: protected: // to m_outbuf int doResample(int outcount, double ratio, bool final); - + IppsResamplingPolyphase_32f **m_state; + double m_initialSampleRate; float **m_inbuf; size_t m_inbufsz; float **m_outbuf; @@ -137,20 +145,23 @@ protected: void setBufSize(int); }; -D_IPP::D_IPP(Resampler::Quality quality, int channels, int maxBufferSize, - int debugLevel) : +D_IPP::D_IPP(Resampler::Quality quality, int channels, double initialSampleRate, + int maxBufferSize, int debugLevel) : m_state(0), + m_initialSampleRate(initialSampleRate), m_channels(channels), m_debugLevel(debugLevel) { if (m_debugLevel > 0) { - std::cerr << "Resampler::Resampler: using IPP implementation" - << std::endl; + cerr << "Resampler::Resampler: using IPP implementation" + << endl; } int nStep = 16; IppHintAlgorithm hint = ippAlgHintFast; + //!!! todo: make use of initialSampleRate to calculate parameters + switch (quality) { case Resampler::Best: @@ -173,7 +184,22 @@ D_IPP::D_IPP(Resampler::Quality quality, int channels, int maxBufferSize, } m_factor = 8; // initial upper bound on m_ratio, may be amended later - m_history = int(m_window * 0.5 * std::max(1.0, 1.0 / m_factor)) + 1; + + // This is largely based on the IPP docs and examples. Adapted + // from the docs: + // + // m_time defines the time value for which the first output + // sample is calculated. The input vector with indices less + // than m_time [whose initial value is m_history below] + // contains the history data of filters. + // + // The history length is [(1/2) window * max(1, 1/factor) ]+1 + // where window is the size of the ideal lowpass filter + // window. The input vector must contain the same number of + // elements with indices greater than m_time + length for the + // right filter wing for the last element. + + m_history = int(m_window * 0.5 * max(1.0, 1.0 / m_factor)) + 1; m_state = new IppsResamplingPolyphase_32f *[m_channels]; @@ -185,11 +211,11 @@ D_IPP::D_IPP(Resampler::Quality quality, int channels, int maxBufferSize, m_inbuf = 0; m_outbuf = 0; m_bufsize = 0; - + setBufSize(maxBufferSize + m_history); if (m_debugLevel > 1) { - std::cerr << "bufsize = " << m_bufsize << ", window = " << m_window << ", nStep = " << nStep << ", history = " << m_history << std::endl; + cerr << "bufsize = " << m_bufsize << ", window = " << m_window << ", nStep = " << nStep << ", history = " << m_history << endl; } #if (IPP_VERSION_MAJOR >= 7) @@ -230,7 +256,7 @@ D_IPP::D_IPP(Resampler::Quality quality, int channels, int maxBufferSize, } if (m_debugLevel > 1) { - std::cerr << "Resampler init done" << std::endl; + cerr << "Resampler init done" << endl; } } @@ -258,20 +284,29 @@ void D_IPP::setBufSize(int sz) { if (m_debugLevel > 1) { - std::cerr << "resize bufsize " << m_bufsize << " -> "; + if (m_bufsize > 0) { + cerr << "resize bufsize " << m_bufsize << " -> "; + } else { + cerr << "initialise bufsize to "; + } } m_bufsize = sz; if (m_debugLevel > 1) { - std::cerr << m_bufsize << std::endl; + cerr << m_bufsize << endl; } int n1 = m_bufsize + m_history + 2; - int n2 = lrintf(ceil((m_bufsize - m_history) * m_factor + 2)); if (m_debugLevel > 1) { - std::cerr << "(outbufsize = " << n2 << ")" << std::endl; + cerr << "inbuf allocating " << m_bufsize << " + " << m_history << " + 2 = " << n1 << endl; + } + + int n2 = (int)lrintf(ceil((m_bufsize - m_history) * m_factor + 2)); + + if (m_debugLevel > 1) { + cerr << "outbuf allocating (" << m_bufsize << " - " << m_history << ") * " << m_factor << " + 2 = " << n2 << endl; } m_inbuf = reallocate_and_zero_extend_channels @@ -285,15 +320,20 @@ D_IPP::setBufSize(int sz) } int -D_IPP::resample(const float *const R__ *const R__ in, - float *const R__ *const R__ out, +D_IPP::resample(float *const R__ *const R__ out, + int outspace, + const float *const R__ *const R__ in, int incount, - float ratio, + double ratio, bool final) { if (ratio > m_factor) { m_factor = ratio; - m_history = int(m_window * 0.5 * std::max(1.0, 1.0 / m_factor)) + 1; + m_history = int(m_window * 0.5 * max(1.0, 1.0 / m_factor)) + 1; + } + + if (m_debugLevel > 2) { + cerr << "incount = " << incount << ", ratio = " << ratio << ", est space = " << lrintf(ceil(incount * ratio)) << ", outspace = " << outspace << ", final = " << final << endl; } for (int c = 0; c < m_channels; ++c) { @@ -309,8 +349,12 @@ D_IPP::resample(const float *const R__ *const R__ in, m_lastread[c] += incount; } - int got = doResample(int(round(incount * ratio)), ratio, final); - + if (m_debugLevel > 2) { + cerr << "lastread advanced to " << m_lastread[0] << endl; + } + + int got = doResample(outspace, ratio, final); + for (int c = 0; c < m_channels; ++c) { v_copy(out[c], m_outbuf[c], got); } @@ -319,15 +363,20 @@ D_IPP::resample(const float *const R__ *const R__ in, } int -D_IPP::resampleInterleaved(const float *const R__ in, - float *const R__ out, +D_IPP::resampleInterleaved(float *const R__ out, + int outspace, + const float *const R__ in, int incount, - float ratio, + double ratio, bool final) { if (ratio > m_factor) { m_factor = ratio; - m_history = int(m_window * 0.5 * std::max(1.0, 1.0 / m_factor)) + 1; + m_history = int(m_window * 0.5 * max(1.0, 1.0 / m_factor)) + 1; + } + + if (m_debugLevel > 2) { + cerr << "incount = " << incount << ", ratio = " << ratio << ", est space = " << lrintf(ceil(incount * ratio)) << ", outspace = " << outspace << ", final = " << final << endl; } for (int c = 0; c < m_channels; ++c) { @@ -343,7 +392,12 @@ D_IPP::resampleInterleaved(const float *const R__ in, m_lastread[c] += incount; } - int got = doResample(int(round(incount * ratio)), ratio, final); + if (m_debugLevel > 2) { + cerr << "lastread advanced to " << m_lastread[0] << " after injection of " + << incount << " samples" << endl; + } + + int got = doResample(outspace, ratio, final); v_interleave(out, m_outbuf, m_channels, got); @@ -360,20 +414,20 @@ D_IPP::doResample(int outspace, double ratio, bool final) int n = m_lastread[c] - m_history - int(m_time[c]); if (c == 0 && m_debugLevel > 2) { - std::cerr << "at start, lastread = " << m_lastread[c] << ", history = " + cerr << "at start, lastread = " << m_lastread[c] << ", history = " << m_history << ", time = " << m_time[c] << ", therefore n = " - << n << std::endl; + << n << endl; } if (n <= 0) { if (c == 0 && m_debugLevel > 1) { - std::cerr << "not enough input samples to do anything" << std::endl; + cerr << "not enough input samples to do anything" << endl; } continue; } if (c == 0 && m_debugLevel > 2) { - std::cerr << "before resample call, time = " << m_time[c] << std::endl; + cerr << "before resample call, time = " << m_time[c] << endl; } // We're committed to not overrunning outspace, so we need to @@ -382,9 +436,9 @@ D_IPP::doResample(int outspace, double ratio, bool final) int limit = int(floor(outspace / ratio)); if (n > limit) { if (c == 0 && m_debugLevel > 1) { - std::cerr << "trimming input samples from " << n << " to " << limit + cerr << "trimming input samples from " << n << " to " << limit << " to avoid overrunning " << outspace << " at output" - << std::endl; + << endl; } n = limit; } @@ -412,11 +466,11 @@ D_IPP::doResample(int outspace, double ratio, bool final) int t = int(round(m_time[c])); if (c == 0 && m_debugLevel > 2) { - std::cerr << "converted " << n << " samples to " << outcount - << ", time advanced to " << t << std::endl; - std::cerr << "will move " << m_lastread[c] + m_history - t + cerr << "converted " << n << " samples to " << outcount + << ", time advanced to " << t << endl; + cerr << "will move " << m_lastread[c] + m_history - t << " unconverted samples back from index " << t - m_history - << " to 0" << std::endl; + << " to 0" << endl; } v_move(m_inbuf[c], @@ -427,9 +481,9 @@ D_IPP::doResample(int outspace, double ratio, bool final) m_time[c] -= t - m_history; if (c == 0 && m_debugLevel > 2) { - std::cerr << "lastread reduced to " << m_lastread[c] + cerr << "lastread reduced to " << m_lastread[c] << ", time reduced to " << m_time[c] - << std::endl; + << endl; } if (final && n < limit) { @@ -446,8 +500,8 @@ D_IPP::doResample(int outspace, double ratio, bool final) int additionalcount = 0; if (c == 0 && m_debugLevel > 2) { - std::cerr << "final call, padding input with " << m_history - << " zeros (symmetrical with m_history)" << std::endl; + cerr << "final call, padding input with " << m_history + << " zeros (symmetrical with m_history)" << endl; } for (int i = 0; i < m_history; ++i) { @@ -455,17 +509,17 @@ D_IPP::doResample(int outspace, double ratio, bool final) } if (c == 0 && m_debugLevel > 2) { - std::cerr << "before resample call, time = " << m_time[c] << std::endl; + cerr << "before resample call, time = " << m_time[c] << endl; } int nAdditional = m_lastread[c] - int(m_time[c]); if (n + nAdditional > limit) { if (c == 0 && m_debugLevel > 1) { - std::cerr << "trimming final input samples from " << nAdditional + cerr << "trimming final input samples from " << nAdditional << " to " << (limit - n) << " to avoid overrunning " << outspace << " at output" - << std::endl; + << endl; } nAdditional = limit - n; } @@ -491,9 +545,9 @@ D_IPP::doResample(int outspace, double ratio, bool final) #endif if (c == 0 && m_debugLevel > 2) { - std::cerr << "converted " << n << " samples to " << additionalcount - << ", time advanced to " << m_time[c] << std::endl; - std::cerr << "outcount = " << outcount << ", additionalcount = " << additionalcount << ", sum " << outcount + additionalcount << std::endl; + cerr << "converted " << n << " samples to " << additionalcount + << ", time advanced to " << m_time[c] << endl; + cerr << "outcount = " << outcount << ", additionalcount = " << additionalcount << ", sum " << outcount + additionalcount << endl; } if (c == 0) { @@ -503,7 +557,7 @@ D_IPP::doResample(int outspace, double ratio, bool final) } if (m_debugLevel > 2) { - std::cerr << "returning " << outcount << " samples" << std::endl; + cerr << "returning " << outcount << " samples" << endl; } return outcount; @@ -519,23 +573,25 @@ D_IPP::reset() #ifdef HAVE_LIBSAMPLERATE -class D_SRC : public ResamplerImpl +class D_SRC : public Resampler::Impl { public: - D_SRC(Resampler::Quality quality, int channels, int maxBufferSize, - int m_debugLevel); + D_SRC(Resampler::Quality quality, int channels, double initialSampleRate, + int maxBufferSize, int m_debugLevel); ~D_SRC(); - int resample(const float *const R__ *const R__ in, - float *const R__ *const R__ out, + int resample(float *const R__ *const R__ out, + int outcount, + const float *const R__ *const R__ in, int incount, - float ratio, + double ratio, bool final); - int resampleInterleaved(const float *const R__ in, - float *const R__ out, + int resampleInterleaved(float *const R__ out, + int outcount, + const float *const R__ in, int incount, - float ratio, + double ratio, bool final = false); int getChannelCount() const { return m_channels; } @@ -546,27 +602,25 @@ protected: SRC_STATE *m_src; float *m_iin; float *m_iout; - float m_lastRatio; int m_channels; int m_iinsize; int m_ioutsize; int m_debugLevel; }; -D_SRC::D_SRC(Resampler::Quality quality, int channels, int maxBufferSize, - int debugLevel) : +D_SRC::D_SRC(Resampler::Quality quality, int channels, double, + int maxBufferSize, int debugLevel) : m_src(0), m_iin(0), m_iout(0), - m_lastRatio(1.f), m_channels(channels), m_iinsize(0), m_ioutsize(0), m_debugLevel(debugLevel) { if (m_debugLevel > 0) { - std::cerr << "Resampler::Resampler: using libsamplerate implementation" - << std::endl; + cerr << "Resampler::Resampler: using libsamplerate implementation" + << endl; } int err = 0; @@ -576,8 +630,8 @@ D_SRC::D_SRC(Resampler::Quality quality, int channels, int maxBufferSize, channels, &err); if (err) { - std::cerr << "Resampler::Resampler: failed to create libsamplerate resampler: " - << src_strerror(err) << std::endl; + cerr << "Resampler::Resampler: failed to create libsamplerate resampler: " + << src_strerror(err) << endl; #ifndef NO_EXCEPTIONS throw Resampler::ImplementationError; #endif @@ -601,125 +655,45 @@ D_SRC::~D_SRC() } int -D_SRC::resample(const float *const R__ *const R__ in, - float *const R__ *const R__ out, +D_SRC::resample(float *const R__ *const R__ out, + int outcount, + const float *const R__ *const R__ in, int incount, - float ratio, + double ratio, bool final) { - SRC_DATA data; - - static size_t n_in = 0, n_out = 0; - - std::cerr << "Resampler::process: at input sample " << n_in << ", output sample " << n_out << ", incount = " << incount << ", ratio = " << ratio << ", incount * ratio = " << incount * ratio << std::endl; - - int outcount = lrintf(ceilf(incount * ratio) + 10); - if (m_channels == 1) { - data.data_in = const_cast(*in); //!!!??? - data.data_out = *out; - } else { - if (incount * m_channels > m_iinsize) { - m_iin = reallocate(m_iin, m_iinsize, incount * m_channels); - m_iinsize = incount * m_channels; - } - if (outcount * m_channels > m_ioutsize) { - m_iout = reallocate(m_iout, m_ioutsize, outcount * m_channels); - m_ioutsize = outcount * m_channels; - } - v_interleave(m_iin, in, m_channels, incount); - data.data_in = m_iin; - data.data_out = m_iout; + return resampleInterleaved(*out, outcount, *in, incount, ratio, final); } - data.src_ratio = ratio; - - int err = 0; - -#ifdef PERFORM_LIBSAMPLERATE_XFADE - const int xfade = 10; - if (ratio != m_lastRatio && outcount > xfade) { - int xin = lrintf(ceilf(xfade / ratio)); - SRC_STATE *xsrc = src_clone(m_src, &err); - float *xbuf = 0; - if (err) { - std::cerr << "Resampler::process: libsamplerate error: " - << src_strerror(err) << ", skipping xfade" << std::endl; - } else { - - data.input_frames = xin; - data.output_frames = xfade; - data.end_of_input = false; - - err = src_process(xsrc, &data); - xbuf = allocate(xfade * m_channels); - v_copy(xbuf, data.data_out, xfade * m_channels); - src_delete(xsrc); - } - - data.input_frames = incount; - data.output_frames = outcount; - data.end_of_input = (final ? 1 : 0); - - src_set_ratio(m_src, ratio); - err = src_process(m_src, &data); - - if (xbuf) { - for (int i = 0; i < xfade; ++i) { - for (int c = 0; c < m_channels; ++c) { - float g = float(i+1) / float(xfade); - float f = data.data_out[i * m_channels + c] * g + - xbuf[i * m_channels + c] * (1.f - g); - data.data_out[i * m_channels + c] = f; - } - } - deallocate(xbuf); - } - } else { -#endif - data.input_frames = incount; - data.output_frames = outcount; - data.end_of_input = (final ? 1 : 0); - err = src_process(m_src, &data); -#ifdef PERFORM_LIBSAMPLERATE_XFADE + if (incount * m_channels > m_iinsize) { + m_iin = reallocate(m_iin, m_iinsize, incount * m_channels); + m_iinsize = incount * m_channels; } -#endif - - if (err) { - std::cerr << "Resampler::process: libsamplerate error: " - << src_strerror(err) << std::endl; -#ifndef NO_EXCEPTIONS - throw Resampler::ImplementationError; -#endif + if (outcount * m_channels > m_ioutsize) { + m_iout = reallocate(m_iout, m_ioutsize, outcount * m_channels); + m_ioutsize = outcount * m_channels; } - - if (m_channels > 1) { - v_deinterleave(out, m_iout, m_channels, data.output_frames_gen); - } - - m_lastRatio = ratio; - - n_in += incount; - n_out += data.output_frames_gen; - - std::cerr << "outcount = " << data.output_frames_gen << std::endl; - -// out[0][0] = 1.0; - return data.output_frames_gen; + v_interleave(m_iin, in, m_channels, incount); + + int n = resampleInterleaved(m_iout, outcount, m_iin, incount, ratio, final); + + v_deinterleave(out, m_iout, m_channels, n); + + return n; } int -D_SRC::resampleInterleaved(const float *const R__ in, - float *const R__ out, +D_SRC::resampleInterleaved(float *const R__ out, + int outcount, + const float *const R__ in, int incount, - float ratio, + double ratio, bool final) { SRC_DATA data; - int outcount = lrintf(ceilf(incount * ratio) + 10); - data.data_in = const_cast(in); data.data_out = out; @@ -727,20 +701,18 @@ D_SRC::resampleInterleaved(const float *const R__ in, data.output_frames = outcount; data.src_ratio = ratio; data.end_of_input = (final ? 1 : 0); - + int err = src_process(m_src, &data); if (err) { - std::cerr << "Resampler::process: libsamplerate error: " - << src_strerror(err) << std::endl; + cerr << "Resampler::process: libsamplerate error: " + << src_strerror(err) << endl; #ifndef NO_EXCEPTIONS throw Resampler::ImplementationError; #endif } - m_lastRatio = ratio; - - return data.output_frames_gen; + return (int)data.output_frames_gen; } void @@ -753,23 +725,25 @@ D_SRC::reset() #ifdef HAVE_LIBRESAMPLE -class D_Resample : public ResamplerImpl +class D_Resample : public Resampler::Impl { public: - D_Resample(Resampler::Quality quality, int channels, int maxBufferSize, - int m_debugLevel); + D_Resample(Resampler::Quality quality, int channels, double initialSampleRate, + int maxBufferSize, int m_debugLevel); ~D_Resample(); - int resample(const float *const R__ *const R__ in, - float *const R__ *const R__ out, + int resample(float *const R__ *const R__ out, + int outcount, + const float *const R__ *const R__ in, int incount, - float ratio, + double ratio, bool final); - int resampleInterleaved(const float *const R__ in, - float *const R__ out, + int resampleInterleaved(float *const R__ out, + int outcount, + const float *const R__ in, int incount, - float ratio, + double ratio, bool final); int getChannelCount() const { return m_channels; } @@ -780,27 +754,26 @@ protected: void *m_src; float *m_iin; float *m_iout; - float m_lastRatio; + double m_lastRatio; int m_channels; int m_iinsize; int m_ioutsize; int m_debugLevel; }; -D_Resample::D_Resample(Resampler::Quality quality, int channels, int maxBufferSize, - int debugLevel) : +D_Resample::D_Resample(Resampler::Quality quality, + int channels, double, int maxBufferSize, int debugLevel) : m_src(0), m_iin(0), m_iout(0), - m_lastRatio(1.f), m_channels(channels), m_iinsize(0), m_ioutsize(0), m_debugLevel(debugLevel) { if (m_debugLevel > 0) { - std::cerr << "Resampler::Resampler: using libresample implementation" - << std::endl; + cerr << "Resampler::Resampler: using libresample implementation" + << endl; } float min_factor = 0.125f; @@ -809,8 +782,8 @@ D_Resample::D_Resample(Resampler::Quality quality, int channels, int maxBufferSi m_src = resample_open(quality == Resampler::Best ? 1 : 0, min_factor, max_factor); if (!m_src) { - std::cerr << "Resampler::Resampler: failed to create libresample resampler: " - << std::endl; + cerr << "Resampler::Resampler: failed to create libresample resampler: " + << endl; throw Resampler::ImplementationError; //!!! of course, need to catch this! } @@ -836,10 +809,11 @@ D_Resample::~D_Resample() } int -D_Resample::resample(const float *const R__ *const R__ in, - float *const R__ *const R__ out, +D_Resample::resample(float *const R__ *const R__ out, + int outcount, + const float *const R__ *const R__ in, int incount, - float ratio, + double ratio, bool final) { float *data_in; @@ -847,7 +821,7 @@ D_Resample::resample(const float *const R__ *const R__ in, int input_frames, output_frames, end_of_input, source_used; float src_ratio; - int outcount = lrintf(ceilf(incount * ratio)); + int outcount = (int)lrint(ceil(incount * ratio)); if (m_channels == 1) { data_in = const_cast(*in); //!!!??? @@ -881,8 +855,8 @@ D_Resample::resample(const float *const R__ *const R__ in, output_frames); if (output_frames_gen < 0) { - std::cerr << "Resampler::process: libresample error: " - << std::endl; + cerr << "Resampler::process: libresample error: " + << endl; throw Resampler::ImplementationError; //!!! of course, need to catch this! } @@ -890,22 +864,21 @@ D_Resample::resample(const float *const R__ *const R__ in, v_deinterleave(out, m_iout, m_channels, output_frames_gen); } - m_lastRatio = ratio; - return output_frames_gen; } int -D_Resample::resampleInterleaved(const float *const R__ in, - float *const R__ out, +D_Resample::resampleInterleaved(float *const R__ out, + int outcount, + const float *const R__ in, int incount, - float ratio, + double ratio, bool final) { int input_frames, output_frames, end_of_input, source_used; float src_ratio; - int outcount = lrintf(ceilf(incount * ratio)); + int outcount = (int)lrint(ceil(incount * ratio)); input_frames = incount; output_frames = outcount; @@ -922,13 +895,11 @@ D_Resample::resampleInterleaved(const float *const R__ in, output_frames); if (output_frames_gen < 0) { - std::cerr << "Resampler::process: libresample error: " - << std::endl; + cerr << "Resampler::process: libresample error: " + << endl; throw Resampler::ImplementationError; //!!! of course, need to catch this! } - m_lastRatio = ratio; - return output_frames_gen; } @@ -941,23 +912,25 @@ D_Resample::reset() #ifdef USE_SPEEX -class D_Speex : public ResamplerImpl +class D_Speex : public Resampler::Impl { public: - D_Speex(Resampler::Quality quality, int channels, int maxBufferSize, - int debugLevel); + D_Speex(Resampler::Quality quality, int channels, double initialSampleRate, + int maxBufferSize, int debugLevel); ~D_Speex(); - int resample(const float *const R__ *const R__ in, - float *const R__ *const R__ out, + int resample(float *const R__ *const R__ out, + int outcount, + const float *const R__ *const R__ in, int incount, - float ratio, + double ratio, bool final); - int resampleInterleaved(const float *const R__ in, - float *const R__ out, + int resampleInterleaved(float *const R__ out, + int outcount, + const float *const R__ in, int incount, - float ratio, + double ratio, bool final = false); int getChannelCount() const { return m_channels; } @@ -966,27 +939,33 @@ public: protected: SpeexResamplerState *m_resampler; + double m_initialSampleRate; float *m_iin; float *m_iout; int m_channels; int m_iinsize; int m_ioutsize; - float m_lastratio; + double m_lastratio; bool m_initial; int m_debugLevel; - void setRatio(float); + void setRatio(double); + void doResample(const float *in, unsigned int &incount, + float *out, unsigned int &outcount, + double ratio, bool final); }; -D_Speex::D_Speex(Resampler::Quality quality, int channels, int maxBufferSize, - int debugLevel) : +D_Speex::D_Speex(Resampler::Quality quality, + int channels, double initialSampleRate, + int maxBufferSize, int debugLevel) : m_resampler(0), + m_initialSampleRate(initialSampleRate), m_iin(0), m_iout(0), m_channels(channels), m_iinsize(0), m_ioutsize(0), - m_lastratio(1), + m_lastratio(-1.0), m_initial(true), m_debugLevel(debugLevel) { @@ -994,24 +973,23 @@ D_Speex::D_Speex(Resampler::Quality quality, int channels, int maxBufferSize, quality == Resampler::Fastest ? 0 : 4); if (m_debugLevel > 0) { - std::cerr << "Resampler::Resampler: using Speex implementation with q = " - << q - << std::endl; + cerr << "Resampler::Resampler: using Speex implementation with q = " + << q << endl; } - q = 10; //!!! + int rrate = int(round(m_initialSampleRate)); int err = 0; m_resampler = speex_resampler_init_frac(m_channels, 1, 1, - 48000, 48000, // irrelevant + rrate, rrate, q, &err); if (err) { - std::cerr << "Resampler::Resampler: failed to create Speex resampler" - << std::endl; + cerr << "Resampler::Resampler: failed to create Speex resampler" + << endl; #ifndef NO_EXCEPTIONS throw Resampler::ImplementationError; #endif @@ -1033,7 +1011,7 @@ D_Speex::~D_Speex() } void -D_Speex::setRatio(float ratio) +D_Speex::setRatio(double ratio) { // Speex wants a ratio of two unsigned integers, not a single // float. Let's do that. @@ -1052,21 +1030,31 @@ D_Speex::setRatio(float ratio) } if (m_debugLevel > 1) { - std::cerr << "D_Speex: Desired ratio " << ratio << ", requesting ratio " - << num << "/" << denom << " = " << float(double(num)/double(denom)) - << std::endl; + cerr << "D_Speex: Desired ratio " << ratio << ", requesting ratio " + << num << "/" << denom << " = " << float(double(num)/double(denom)) + << endl; } + + int fromRate = int(round(m_initialSampleRate)); + int toRate = int(round(m_initialSampleRate * ratio)); int err = speex_resampler_set_rate_frac - (m_resampler, denom, num, 48000, 48000); - //!!! check err + (m_resampler, denom, num, fromRate, toRate); + + if (err) { + cerr << "Resampler::Resampler: failed to set rate on Speex resampler" + << endl; +#ifndef NO_EXCEPTIONS + throw Resampler::ImplementationError; +#endif + } speex_resampler_get_ratio(m_resampler, &denom, &num); if (m_debugLevel > 1) { - std::cerr << "D_Speex: Desired ratio " << ratio << ", got ratio " - << num << "/" << denom << " = " << float(double(num)/double(denom)) - << std::endl; + cerr << "D_Speex: Desired ratio " << ratio << ", got ratio " + << num << "/" << denom << " = " << float(double(num)/double(denom)) + << endl; } m_lastratio = ratio; @@ -1078,10 +1066,11 @@ D_Speex::setRatio(float ratio) } int -D_Speex::resample(const float *const R__ *const R__ in, - float *const R__ *const R__ out, +D_Speex::resample(float *const R__ *const R__ out, + int outcount, + const float *const R__ *const R__ in, int incount, - float ratio, + double ratio, bool final) { if (ratio != m_lastratio) { @@ -1089,10 +1078,7 @@ D_Speex::resample(const float *const R__ *const R__ in, } unsigned int uincount = incount; - - // This doesn't have to be exact, but it does have to be - // sufficient, hence going over by one - unsigned int outcount = lrintf(ceilf((incount + 1) * ratio)); + unsigned int uoutcount = outcount; float *data_in, *data_out; @@ -1100,11 +1086,11 @@ D_Speex::resample(const float *const R__ *const R__ in, data_in = const_cast(*in); data_out = *out; } else { - if (incount * m_channels > m_iinsize) { + if (int(incount * m_channels) > m_iinsize) { m_iin = reallocate(m_iin, m_iinsize, incount * m_channels); m_iinsize = incount * m_channels; } - if (outcount * m_channels > m_ioutsize) { + if (int(outcount * m_channels) > m_ioutsize) { m_iout = reallocate(m_iout, m_ioutsize, outcount * m_channels); m_ioutsize = outcount * m_channels; } @@ -1113,38 +1099,21 @@ D_Speex::resample(const float *const R__ *const R__ in, data_out = m_iout; } - int err = speex_resampler_process_interleaved_float(m_resampler, - data_in, - &uincount, - data_out, - &outcount); - - if (incount != int(uincount)) { - std::cerr << "Resampler: NOTE: Consumed " << uincount - << " of " << incount << " frames" << std::endl; - } - - if (outcount != lrintf(ceilf(incount * ratio))) { - std::cerr << "Resampler: NOTE: Obtained " << outcount - << " of " << lrintf(ceilf(incount * ratio)) << " frames" - << std::endl; - } - - //!!! check err, respond appropriately - + doResample(data_in, uincount, data_out, uoutcount, ratio, final); if (m_channels > 1) { - v_deinterleave(out, m_iout, m_channels, outcount); + v_deinterleave(out, m_iout, m_channels, uoutcount); } - return outcount; + return uoutcount; } int -D_Speex::resampleInterleaved(const float *const R__ in, - float *const R__ out, +D_Speex::resampleInterleaved(float *const R__ out, + int outcount, + const float *const R__ in, int incount, - float ratio, + double ratio, bool final) { if (ratio != m_lastratio) { @@ -1152,20 +1121,60 @@ D_Speex::resampleInterleaved(const float *const R__ in, } unsigned int uincount = incount; - unsigned int outcount = lrintf(ceilf(incount * ratio)); //!!! inexact now + unsigned int uoutcount = outcount; float *data_in = const_cast(in); float *data_out = out; - int err = speex_resampler_process_interleaved_float(m_resampler, - data_in, - &uincount, - data_out, - &outcount); + doResample(data_in, uincount, data_out, uoutcount, ratio, final); + + return uoutcount; +} -// std::cerr << "D_SPEEX: incount " << incount << " ratio " << ratio << " req " << lrintf(ceilf(incount * ratio)) << " final " << final << " output_frames_gen " << outcount << std::endl; +void +D_Speex::doResample(const float *data_in, unsigned int &uincount, + float *data_out, unsigned int &uoutcount, + double ratio, bool final) +{ + int initial_outcount = int(uoutcount); + + int err = speex_resampler_process_interleaved_float + (m_resampler, + data_in, &uincount, + data_out, &uoutcount); + + if (err) { + cerr << "Resampler::Resampler: Speex resampler returned error " + << err << endl; +#ifndef NO_EXCEPTIONS + throw Resampler::ImplementationError; +#endif + } - return outcount; + if (final) { + int actual = int(uoutcount); + int expected = std::min(initial_outcount, int(round(uincount * ratio))); + if (actual < expected) { + unsigned int final_out = expected - actual; + unsigned int final_in = (unsigned int)(round(final_out / ratio)); + if (final_in > 0) { + float *pad = allocate_and_zero(final_in * m_channels); + err = speex_resampler_process_interleaved_float + (m_resampler, + pad, &final_in, + data_out + actual * m_channels, &final_out); + deallocate(pad); + uoutcount += final_out; + if (err) { + cerr << "Resampler::Resampler: Speex resampler returned error " + << err << endl; +#ifndef NO_EXCEPTIONS + throw Resampler::ImplementationError; +#endif + } + } + } + } } void @@ -1180,12 +1189,15 @@ D_Speex::reset() } /* end namespace Resamplers */ -Resampler::Resampler(Resampler::Quality quality, int channels, - int maxBufferSize, int debugLevel) +Resampler::Resampler(Resampler::Parameters params, int channels) { m_method = -1; + + if (params.initialSampleRate == 0) { + params.initialSampleRate = 44100; + } - switch (quality) { + switch (params.quality) { case Resampler::Best: #ifdef HAVE_IPP @@ -1234,63 +1246,63 @@ Resampler::Resampler(Resampler::Quality quality, int channels, } if (m_method == -1) { - std::cerr << "Resampler::Resampler(" << quality << ", " << channels - << ", " << maxBufferSize << "): No implementation available!" - << std::endl; + cerr << "Resampler::Resampler: No implementation available!" << endl; abort(); } switch (m_method) { case 0: #ifdef HAVE_IPP - d = new Resamplers::D_IPP(quality, channels, maxBufferSize, debugLevel); + d = new Resamplers::D_IPP + (params.quality, + channels, + params.initialSampleRate, params.maxBufferSize, params.debugLevel); #else - std::cerr << "Resampler::Resampler(" << quality << ", " << channels - << ", " << maxBufferSize << "): No implementation available!" - << std::endl; + cerr << "Resampler::Resampler: No implementation available!" << endl; abort(); #endif break; case 1: #ifdef HAVE_LIBSAMPLERATE - d = new Resamplers::D_SRC(quality, channels, maxBufferSize, debugLevel); + d = new Resamplers::D_SRC + (params.quality, + channels, + params.initialSampleRate, params.maxBufferSize, params.debugLevel); #else - std::cerr << "Resampler::Resampler(" << quality << ", " << channels - << ", " << maxBufferSize << "): No implementation available!" - << std::endl; + cerr << "Resampler::Resampler: No implementation available!" << endl; abort(); #endif break; case 2: #ifdef USE_SPEEX - d = new Resamplers::D_Speex(quality, channels, maxBufferSize, debugLevel); + d = new Resamplers::D_Speex + (params.quality, + channels, + params.initialSampleRate, params.maxBufferSize, params.debugLevel); #else - std::cerr << "Resampler::Resampler(" << quality << ", " << channels - << ", " << maxBufferSize << "): No implementation available!" - << std::endl; + cerr << "Resampler::Resampler: No implementation available!" << endl; abort(); #endif break; case 3: #ifdef HAVE_LIBRESAMPLE - d = new Resamplers::D_Resample(quality, channels, maxBufferSize, debugLevel); + d = new Resamplers::D_Resample + (params.quality, + channels, + params.initialSampleRate, params.maxBufferSize, params.debugLevel); #else - std::cerr << "Resampler::Resampler(" << quality << ", " << channels - << ", " << maxBufferSize << "): No implementation available!" - << std::endl; + cerr << "Resampler::Resampler: No implementation available!" << endl; abort(); #endif break; } if (!d) { - std::cerr << "Resampler::Resampler(" << quality << ", " << channels - << ", " << maxBufferSize - << "): Internal error: No implementation selected" - << std::endl; + cerr << "Resampler::Resampler: Internal error: No implementation selected" + << endl; abort(); } } @@ -1301,21 +1313,27 @@ Resampler::~Resampler() } int -Resampler::resample(const float *const R__ *const R__ in, - float *const R__ *const R__ out, - int incount, float ratio, bool final) +Resampler::resample(float *const R__ *const R__ out, + int outcount, + const float *const R__ *const R__ in, + int incount, + double ratio, + bool final) { Profiler profiler("Resampler::resample"); - return d->resample(in, out, incount, ratio, final); + return d->resample(out, outcount, in, incount, ratio, final); } int -Resampler::resampleInterleaved(const float *const R__ in, - float *const R__ out, - int incount, float ratio, bool final) +Resampler::resampleInterleaved(float *const R__ out, + int outcount, + const float *const R__ in, + int incount, + double ratio, + bool final) { - Profiler profiler("Resampler::resample"); - return d->resampleInterleaved(in, out, incount, ratio, final); + Profiler profiler("Resampler::resampleInterleaved"); + return d->resampleInterleaved(out, outcount, in, incount, ratio, final); } int diff --git a/src/dsp/Resampler.h b/src/dsp/Resampler.h index 5b51400..16b0d06 100644 --- a/src/dsp/Resampler.h +++ b/src/dsp/Resampler.h @@ -21,60 +21,118 @@ you must obtain a valid commercial licence before doing so. */ -#ifndef _RUBBERBAND_RESAMPLER_H_ -#define _RUBBERBAND_RESAMPLER_H_ +#ifndef RUBBERBAND_RESAMPLER_H +#define RUBBERBAND_RESAMPLER_H #include "system/sysutils.h" namespace RubberBand { -class ResamplerImpl; - class Resampler { public: enum Quality { Best, FastestTolerable, Fastest }; enum Exception { ImplementationError }; + struct Parameters { + + /** + * Resampler filter quality level. + */ + Quality quality; + + /** + * Rate of expected input prior to resampling: may be used to + * determine the filter bandwidth for the quality setting. If + * you don't know what this will be, you can provide an + * arbitrary rate (such as the default) and the resampler will + * work fine, but quality may not be as designed. + */ + double initialSampleRate; + + /** + * Bound on the maximum incount size that may be passed to the + * resample function before the resampler needs to reallocate + * its internal buffers. Default is zero, so that buffer + * allocation will happen on the first call and any subsequent + * call with a greater incount. + */ + int maxBufferSize; + + /** + * Debug output level, from 0 to 3. Controls the amount of + * debug information printed to stderr. + */ + int debugLevel; + + Parameters() : + quality(FastestTolerable), + initialSampleRate(44100), + maxBufferSize(0), + debugLevel(0) { } + }; + /** - * Construct a resampler with the given quality level and channel - * count. maxBufferSize gives a bound on the maximum incount size - * that may be passed to the resample function before the - * resampler needs to reallocate its internal buffers. + * Construct a resampler to process the given number of channels, + * with the given quality level, initial sample rate, and other + * parameters. */ - Resampler(Quality quality, int channels, int maxBufferSize = 0, - int debugLevel = 0); + Resampler(Parameters parameters, int channels); + ~Resampler(); /** * Resample the given multi-channel buffers, where incount is the - * number of frames in the input buffers. Returns the number of - * frames written to the output buffers. + * number of frames in the input buffers and outspace is the space + * available in the output buffers. Generally you want outspace to + * be at least ceil(incount * ratio). + * + * Returns the number of frames written to the output + * buffers. This may be smaller than outspace even where the ratio + * suggests otherwise, particularly at the start of processing + * where there may be a filter tail to allow for. */ - int resample(const float *const R__ *const R__ in, - float *const R__ *const R__ out, +#ifdef __GNUC__ + __attribute__((warn_unused_result)) +#endif + int resample(float *const R__ *const R__ out, + int outspace, + const float *const R__ *const R__ in, int incount, - float ratio, + double ratio, bool final = false); /** * Resample the given interleaved buffer, where incount is the * number of frames in the input buffer (i.e. it has incount * - * getChannelCount() samples). Returns the number of frames - * written to the output buffer. + * getChannelCount() samples) and outspace is the space available + * in frames in the output buffer (i.e. it has space for at least + * outspace * getChannelCount() samples). Generally you want + * outspace to be at least ceil(incount * ratio). + * + * Returns the number of frames written to the output buffer. This + * may be smaller than outspace even where the ratio suggests + * otherwise, particularly at the start of processing where there + * may be a filter tail to allow for. */ - int resampleInterleaved(const float *const R__ in, - float *const R__ out, +#ifdef __GNUC__ + __attribute__((warn_unused_result)) +#endif + int resampleInterleaved(float *const R__ out, + int outspace, + const float *const R__ in, int incount, - float ratio, + double ratio, bool final = false); int getChannelCount() const; void reset(); + class Impl; + protected: - ResamplerImpl *d; + Impl *d; int m_method; }; diff --git a/src/system/Allocators.h b/src/system/Allocators.h index 2f1fbbf..0783bdc 100644 --- a/src/system/Allocators.h +++ b/src/system/Allocators.h @@ -21,8 +21,8 @@ you must obtain a valid commercial licence before doing so. */ -#ifndef _RUBBERBAND_ALLOCATORS_H_ -#define _RUBBERBAND_ALLOCATORS_H_ +#ifndef RUBBERBAND_ALLOCATORS_H +#define RUBBERBAND_ALLOCATORS_H #include "VectorOps.h" From ce5b79bb450d60a9b36e495c37e0a5035b0eda08 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Wed, 21 Oct 2020 16:50:19 +0100 Subject: [PATCH 16/23] Avoid passing a very oversize outcount for the reasons given --- src/dsp/Resampler.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index 280e817..0b58bc4 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -699,9 +699,23 @@ D_SRC::resampleInterleaved(float *const R__ out, data.input_frames = incount; data.output_frames = outcount; + + // libsamplerate smooths the filter change over the duration of + // the processing block to avoid artifacts due to sudden changes, + // and it uses outcount to determine how long to smooth the change + // over. This is a good thing in principle, but it does mean (a) + // we should never pass outcount significantly longer than the + // actual expected output, and (b) when the ratio has just + // changed, we should aim to supply a shortish block next (this + // part still todo!) + + if (data.output_frames > int(ceil(incount * ratio) + 10)) { + data.output_frames = int(ceil(incount * ratio) + 10); + } + data.src_ratio = ratio; data.end_of_input = (final ? 1 : 0); - + int err = src_process(m_src, &data); if (err) { @@ -711,7 +725,7 @@ D_SRC::resampleInterleaved(float *const R__ out, throw Resampler::ImplementationError; #endif } - + return (int)data.output_frames_gen; } From c4ad5b6f4adaa252a14c283bcee1a17265f645cb Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Wed, 21 Oct 2020 17:16:15 +0100 Subject: [PATCH 17/23] Also avoid passing a genuinely long block when the ratio has just changed --- src/dsp/Resampler.cpp | 57 +++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index 0b58bc4..35d69e5 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -605,6 +605,7 @@ protected: int m_channels; int m_iinsize; int m_ioutsize; + double m_prevRatio; int m_debugLevel; }; @@ -616,6 +617,7 @@ D_SRC::D_SRC(Resampler::Quality quality, int channels, double, m_channels(channels), m_iinsize(0), m_ioutsize(0), + m_prevRatio(1.0), m_debugLevel(debugLevel) { if (m_debugLevel > 0) { @@ -693,25 +695,54 @@ D_SRC::resampleInterleaved(float *const R__ out, bool final) { SRC_DATA data; + + // libsamplerate smooths the filter change over the duration of + // the processing block to avoid artifacts due to sudden changes, + // and it uses outcount to determine how long to smooth the change + // over. This is a good thing, but it does mean (a) we should + // never pass outcount significantly longer than the actual + // expected output, and (b) when the ratio has just changed, we + // should aim to supply a shortish block next + + if (outcount > int(ceil(incount * ratio) + 5)) { + outcount = int(ceil(incount * ratio) + 5); + } + + if (ratio != m_prevRatio) { + + // If we are processing a block of appreciable length, turn it + // into two recursive calls, one for the short smoothing block + // and the other for the rest. Update m_prevRatio before doing + // this so that the calls don't themselves recurse! + m_prevRatio = ratio; + + int shortBlock = 200; + if (outcount > shortBlock * 2) { + int shortIn = int(floor(shortBlock / ratio)); + if (shortIn >= 10) { + int shortOut = + resampleInterleaved(out, shortBlock, + in, shortIn, + ratio, false); + int remainingOut = 0; + if (shortOut < outcount) { + remainingOut = + resampleInterleaved(out + shortOut * m_channels, + outcount - shortOut, + in + shortIn * m_channels, + incount - shortIn, + ratio, final); + } + return shortOut + remainingOut; + } + } + } data.data_in = const_cast(in); data.data_out = out; data.input_frames = incount; data.output_frames = outcount; - - // libsamplerate smooths the filter change over the duration of - // the processing block to avoid artifacts due to sudden changes, - // and it uses outcount to determine how long to smooth the change - // over. This is a good thing in principle, but it does mean (a) - // we should never pass outcount significantly longer than the - // actual expected output, and (b) when the ratio has just - // changed, we should aim to supply a shortish block next (this - // part still todo!) - - if (data.output_frames > int(ceil(incount * ratio) + 10)) { - data.output_frames = int(ceil(incount * ratio) + 10); - } data.src_ratio = ratio; data.end_of_input = (final ? 1 : 0); From cce7bdd703d0af323fb8e4120656ba228051fca2 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 22 Oct 2020 13:55:50 +0100 Subject: [PATCH 18/23] Fix buffer underrun in case where rounded sample count accumulator exceeds length --- src/dsp/Resampler.cpp | 102 ++++++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 33 deletions(-) diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index 35d69e5..b2c2d7c 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -215,7 +215,7 @@ D_IPP::D_IPP(Resampler::Quality quality, int channels, double initialSampleRate, setBufSize(maxBufferSize + m_history); if (m_debugLevel > 1) { - cerr << "bufsize = " << m_bufsize << ", window = " << m_window << ", nStep = " << nStep << ", history = " << m_history << endl; + cerr << "D_IPP: bufsize = " << m_bufsize << ", window = " << m_window << ", nStep = " << nStep << ", history = " << m_history << endl; } #if (IPP_VERSION_MAJOR >= 7) @@ -249,6 +249,11 @@ D_IPP::D_IPP(Resampler::Quality quality, int channels, double initialSampleRate, 9.0f, m_state[c], hint); + + if (m_debugLevel > 1) { + cerr << "D_IPP: Resampler state size = " << specSize << ", allocated at " + << m_state[c] << endl; + } #endif m_lastread[c] = m_history; @@ -256,7 +261,7 @@ D_IPP::D_IPP(Resampler::Quality quality, int channels, double initialSampleRate, } if (m_debugLevel > 1) { - cerr << "Resampler init done" << endl; + cerr << "D_IPP: Resampler init done" << endl; } } @@ -285,9 +290,9 @@ D_IPP::setBufSize(int sz) { if (m_debugLevel > 1) { if (m_bufsize > 0) { - cerr << "resize bufsize " << m_bufsize << " -> "; + cerr << "D_IPP: resize bufsize " << m_bufsize << " -> "; } else { - cerr << "initialise bufsize to "; + cerr << "D_IPP: initialise bufsize to "; } } @@ -300,13 +305,13 @@ D_IPP::setBufSize(int sz) int n1 = m_bufsize + m_history + 2; if (m_debugLevel > 1) { - cerr << "inbuf allocating " << m_bufsize << " + " << m_history << " + 2 = " << n1 << endl; + cerr << "D_IPP: inbuf allocating " << m_bufsize << " + " << m_history << " + 2 = " << n1 << endl; } int n2 = (int)lrintf(ceil((m_bufsize - m_history) * m_factor + 2)); if (m_debugLevel > 1) { - cerr << "outbuf allocating (" << m_bufsize << " - " << m_history << ") * " << m_factor << " + 2 = " << n2 << endl; + cerr << "D_IPP: outbuf allocating (" << m_bufsize << " - " << m_history << ") * " << m_factor << " + 2 = " << n2 << endl; } m_inbuf = reallocate_and_zero_extend_channels @@ -314,9 +319,24 @@ D_IPP::setBufSize(int sz) m_outbuf = reallocate_and_zero_extend_channels (m_outbuf, m_channels, m_outbufsz, m_channels, n2); - + m_inbufsz = n1; m_outbufsz = n2; + + if (m_debugLevel > 2) { + + cerr << "D_IPP: inbuf ptr = " << m_inbuf << ", channel inbufs "; + for (int c = 0; c < m_channels; ++c) { + cerr << m_inbuf[c] << " "; + } + cerr << "at " << m_inbufsz * sizeof(float) << " bytes each" << endl; + + cerr << "D_IPP: outbuf ptr = " << m_outbuf << ", channel outbufs "; + for (int c = 0; c < m_channels; ++c) { + cerr << m_outbuf[c] << " "; + } + cerr << "at " << m_outbufsz * sizeof(float) << " bytes each" << endl; + } } int @@ -333,7 +353,7 @@ D_IPP::resample(float *const R__ *const R__ out, } if (m_debugLevel > 2) { - cerr << "incount = " << incount << ", ratio = " << ratio << ", est space = " << lrintf(ceil(incount * ratio)) << ", outspace = " << outspace << ", final = " << final << endl; + cerr << "D_IPP: incount = " << incount << ", ratio = " << ratio << ", est space = " << lrintf(ceil(incount * ratio)) << ", outspace = " << outspace << ", final = " << final << endl; } for (int c = 0; c < m_channels; ++c) { @@ -350,7 +370,7 @@ D_IPP::resample(float *const R__ *const R__ out, } if (m_debugLevel > 2) { - cerr << "lastread advanced to " << m_lastread[0] << endl; + cerr << "D_IPP: lastread advanced to " << m_lastread[0] << endl; } int got = doResample(outspace, ratio, final); @@ -376,7 +396,7 @@ D_IPP::resampleInterleaved(float *const R__ out, } if (m_debugLevel > 2) { - cerr << "incount = " << incount << ", ratio = " << ratio << ", est space = " << lrintf(ceil(incount * ratio)) << ", outspace = " << outspace << ", final = " << final << endl; + cerr << "D_IPP: incount = " << incount << ", ratio = " << ratio << ", est space = " << lrintf(ceil(incount * ratio)) << ", outspace = " << outspace << ", final = " << final << endl; } for (int c = 0; c < m_channels; ++c) { @@ -393,7 +413,7 @@ D_IPP::resampleInterleaved(float *const R__ out, } if (m_debugLevel > 2) { - cerr << "lastread advanced to " << m_lastread[0] << " after injection of " + cerr << "D_IPP: lastread advanced to " << m_lastread[0] << " after injection of " << incount << " samples" << endl; } @@ -414,20 +434,20 @@ D_IPP::doResample(int outspace, double ratio, bool final) int n = m_lastread[c] - m_history - int(m_time[c]); if (c == 0 && m_debugLevel > 2) { - cerr << "at start, lastread = " << m_lastread[c] << ", history = " + cerr << "D_IPP: at start, lastread = " << m_lastread[c] << ", history = " << m_history << ", time = " << m_time[c] << ", therefore n = " << n << endl; } if (n <= 0) { if (c == 0 && m_debugLevel > 1) { - cerr << "not enough input samples to do anything" << endl; + cerr << "D_IPP: not enough input samples to do anything" << endl; } continue; } if (c == 0 && m_debugLevel > 2) { - cerr << "before resample call, time = " << m_time[c] << endl; + cerr << "D_IPP: before resample call, time = " << m_time[c] << endl; } // We're committed to not overrunning outspace, so we need to @@ -436,7 +456,7 @@ D_IPP::doResample(int outspace, double ratio, bool final) int limit = int(floor(outspace / ratio)); if (n > limit) { if (c == 0 && m_debugLevel > 1) { - cerr << "trimming input samples from " << n << " to " << limit + cerr << "D_IPP: trimming input samples from " << n << " to " << limit << " to avoid overrunning " << outspace << " at output" << endl; } @@ -463,25 +483,41 @@ D_IPP::doResample(int outspace, double ratio, bool final) m_state[c]); #endif - int t = int(round(m_time[c])); - + int t = int(floor(m_time[c])); + + int moveFrom = t - m_history; + if (c == 0 && m_debugLevel > 2) { - cerr << "converted " << n << " samples to " << outcount - << ", time advanced to " << t << endl; - cerr << "will move " << m_lastread[c] + m_history - t - << " unconverted samples back from index " << t - m_history + cerr << "D_IPP: converted " << n << " samples to " << outcount + << " (nb outbufsz = " << m_outbufsz + << "), time advanced to " << m_time[c] << endl; + cerr << "D_IPP: rounding time to " << t << ", lastread = " + << m_lastread[c] << ", history = " << m_history << endl; + cerr << "D_IPP: will move " << m_lastread[c] - moveFrom + << " unconverted samples back from index " << moveFrom << " to 0" << endl; } + + if (moveFrom >= m_lastread[c]) { - v_move(m_inbuf[c], - m_inbuf[c] + t - m_history, - m_lastread[c] + m_history - t); + moveFrom = m_lastread[c]; - m_lastread[c] -= t - m_history; - m_time[c] -= t - m_history; + if (c == 0 && m_debugLevel > 2) { + cerr << "D_IPP: number of samples to move is <= 0, " + << "not actually moving any" << endl; + } + } else { + + v_move(m_inbuf[c], + m_inbuf[c] + moveFrom, + m_lastread[c] - moveFrom); + } + + m_lastread[c] -= moveFrom; + m_time[c] -= moveFrom; if (c == 0 && m_debugLevel > 2) { - cerr << "lastread reduced to " << m_lastread[c] + cerr << "D_IPP: lastread reduced to " << m_lastread[c] << ", time reduced to " << m_time[c] << endl; } @@ -500,7 +536,7 @@ D_IPP::doResample(int outspace, double ratio, bool final) int additionalcount = 0; if (c == 0 && m_debugLevel > 2) { - cerr << "final call, padding input with " << m_history + cerr << "D_IPP: final call, padding input with " << m_history << " zeros (symmetrical with m_history)" << endl; } @@ -509,14 +545,14 @@ D_IPP::doResample(int outspace, double ratio, bool final) } if (c == 0 && m_debugLevel > 2) { - cerr << "before resample call, time = " << m_time[c] << endl; + cerr << "D_IPP: before resample call, time = " << m_time[c] << endl; } int nAdditional = m_lastread[c] - int(m_time[c]); if (n + nAdditional > limit) { if (c == 0 && m_debugLevel > 1) { - cerr << "trimming final input samples from " << nAdditional + cerr << "D_IPP: trimming final input samples from " << nAdditional << " to " << (limit - n) << " to avoid overrunning " << outspace << " at output" << endl; @@ -545,9 +581,9 @@ D_IPP::doResample(int outspace, double ratio, bool final) #endif if (c == 0 && m_debugLevel > 2) { - cerr << "converted " << n << " samples to " << additionalcount + cerr << "D_IPP: converted " << n << " samples to " << additionalcount << ", time advanced to " << m_time[c] << endl; - cerr << "outcount = " << outcount << ", additionalcount = " << additionalcount << ", sum " << outcount + additionalcount << endl; + cerr << "D_IPP: outcount = " << outcount << ", additionalcount = " << additionalcount << ", sum " << outcount + additionalcount << endl; } if (c == 0) { @@ -557,7 +593,7 @@ D_IPP::doResample(int outspace, double ratio, bool final) } if (m_debugLevel > 2) { - cerr << "returning " << outcount << " samples" << endl; + cerr << "D_IPP: returning " << outcount << " samples" << endl; } return outcount; From 75f9af1f9b1b38a1c16435d4cb590a2f739ab513 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 22 Oct 2020 14:00:32 +0100 Subject: [PATCH 19/23] Remove support for resampler from IPP versions older than v7 --- src/dsp/Resampler.cpp | 43 +------------------------------------------ 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index b2c2d7c..ee59f7b 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -36,9 +36,7 @@ #ifdef HAVE_IPP #include #if (IPP_VERSION_MAJOR < 7) -#include -#include -#include +#error Unsupported IPP version, must be >= 7 #else #include #endif @@ -218,7 +216,6 @@ D_IPP::D_IPP(Resampler::Quality quality, int channels, double initialSampleRate, cerr << "D_IPP: bufsize = " << m_bufsize << ", window = " << m_window << ", nStep = " << nStep << ", history = " << m_history << endl; } -#if (IPP_VERSION_MAJOR >= 7) int specSize = 0; ippsResamplePolyphaseGetSize_32f(float(m_window), nStep, @@ -231,17 +228,8 @@ D_IPP::D_IPP(Resampler::Quality quality, int channels, double initialSampleRate, abort(); #endif } -#endif for (int c = 0; c < m_channels; ++c) { -#if (IPP_VERSION_MAJOR < 7) - ippsResamplePolyphaseInitAlloc_32f(&m_state[c], - float(m_window), - nStep, - 0.95f, - 9.0f, - hint); -#else m_state[c] = (IppsResamplingPolyphase_32f *)ippsMalloc_8u(specSize); ippsResamplePolyphaseInit_32f(float(m_window), nStep, @@ -254,7 +242,6 @@ D_IPP::D_IPP(Resampler::Quality quality, int channels, double initialSampleRate, cerr << "D_IPP: Resampler state size = " << specSize << ", allocated at " << m_state[c] << endl; } -#endif m_lastread[c] = m_history; m_time[c] = m_history; @@ -267,15 +254,9 @@ D_IPP::D_IPP(Resampler::Quality quality, int channels, double initialSampleRate, D_IPP::~D_IPP() { -#if (IPP_VERSION_MAJOR < 7) - for (int c = 0; c < m_channels; ++c) { - ippsResamplePolyphaseFree_32f(m_state[c]); - } -#else for (int c = 0; c < m_channels; ++c) { ippsFree(m_state[c]); } -#endif deallocate_channels(m_inbuf, m_channels); deallocate_channels(m_outbuf, m_channels); @@ -463,16 +444,6 @@ D_IPP::doResample(int outspace, double ratio, bool final) n = limit; } -#if (IPP_VERSION_MAJOR < 7) - ippsResamplePolyphase_32f(m_state[c], - m_inbuf[c], - n, - m_outbuf[c], - ratio, - 1.0f, - &m_time[c], - &outcount); -#else ippsResamplePolyphase_32f(m_inbuf[c], n, m_outbuf[c], @@ -481,7 +452,6 @@ D_IPP::doResample(int outspace, double ratio, bool final) &m_time[c], &outcount, m_state[c]); -#endif int t = int(floor(m_time[c])); @@ -560,16 +530,6 @@ D_IPP::doResample(int outspace, double ratio, bool final) nAdditional = limit - n; } -#if (IPP_VERSION_MAJOR < 7) - ippsResamplePolyphase_32f(m_state[c], - m_inbuf[c], - nAdditional, - m_outbuf[c], - ratio, - 1.0f, - &m_time[c], - &additionalcount); -#else ippsResamplePolyphase_32f(m_inbuf[c], nAdditional, m_outbuf[c], @@ -578,7 +538,6 @@ D_IPP::doResample(int outspace, double ratio, bool final) &m_time[c], &additionalcount, m_state[c]); -#endif if (c == 0 && m_debugLevel > 2) { cerr << "D_IPP: converted " << n << " samples to " << additionalcount From e3e5fe7ae34d438c5769c27161a91c4905e93b01 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 22 Oct 2020 15:55:48 +0100 Subject: [PATCH 20/23] Settle on a single set of params that seems to work ok in tests --- src/dsp/Resampler.cpp | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index ee59f7b..ac1f8eb 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -143,7 +143,8 @@ protected: void setBufSize(int); }; -D_IPP::D_IPP(Resampler::Quality quality, int channels, double initialSampleRate, +D_IPP::D_IPP(Resampler::Quality /* quality */, + int channels, double initialSampleRate, int maxBufferSize, int debugLevel) : m_state(0), m_initialSampleRate(initialSampleRate), @@ -151,36 +152,13 @@ D_IPP::D_IPP(Resampler::Quality quality, int channels, double initialSampleRate, m_debugLevel(debugLevel) { if (m_debugLevel > 0) { - cerr << "Resampler::Resampler: using IPP implementation" - << endl; + cerr << "Resampler::Resampler: using IPP implementation" << endl; } int nStep = 16; IppHintAlgorithm hint = ippAlgHintFast; - - //!!! todo: make use of initialSampleRate to calculate parameters - - switch (quality) { - - case Resampler::Best: - m_window = 64; - nStep = 80; - hint = ippAlgHintAccurate; - break; - - case Resampler::FastestTolerable: - nStep = 16; - m_window = 16; - hint = ippAlgHintFast; - break; - - case Resampler::Fastest: - m_window = 24; - nStep = 64; - hint = ippAlgHintFast; - break; - } - + m_window = 48; + nStep = 80; m_factor = 8; // initial upper bound on m_ratio, may be amended later // This is largely based on the IPP docs and examples. Adapted From dbaaf1259825bd4522fd28b09c07fe6c1bc2b672 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Thu, 22 Oct 2020 17:13:55 +0100 Subject: [PATCH 21/23] Hm, those settings weren't as well-behaved as I thought. I wonder if there is some unpleasant randomness here --- src/dsp/Resampler.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index ac1f8eb..c6ad53c 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -155,10 +155,9 @@ D_IPP::D_IPP(Resampler::Quality /* quality */, cerr << "Resampler::Resampler: using IPP implementation" << endl; } - int nStep = 16; + m_window = 32; + int nStep = 64; IppHintAlgorithm hint = ippAlgHintFast; - m_window = 48; - nStep = 80; m_factor = 8; // initial upper bound on m_ratio, may be amended later // This is largely based on the IPP docs and examples. Adapted From 897bd14b8e14246481ef53ffbb1d2c9e633c9494 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Mon, 16 Nov 2020 09:11:12 +0000 Subject: [PATCH 22/23] Avoid any smoothing when setting initial ratio --- src/dsp/Resampler.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/dsp/Resampler.cpp b/src/dsp/Resampler.cpp index c6ad53c..ff38423 100644 --- a/src/dsp/Resampler.cpp +++ b/src/dsp/Resampler.cpp @@ -578,6 +578,7 @@ protected: int m_iinsize; int m_ioutsize; double m_prevRatio; + bool m_ratioUnset; int m_debugLevel; }; @@ -590,6 +591,7 @@ D_SRC::D_SRC(Resampler::Quality quality, int channels, double, m_iinsize(0), m_ioutsize(0), m_prevRatio(1.0), + m_ratioUnset(true), m_debugLevel(debugLevel) { if (m_debugLevel > 0) { @@ -680,7 +682,14 @@ D_SRC::resampleInterleaved(float *const R__ out, outcount = int(ceil(incount * ratio) + 5); } - if (ratio != m_prevRatio) { + if (m_ratioUnset) { + + // The first time we set a ratio, we want to do it directly + src_set_ratio(m_src, ratio); + m_ratioUnset = false; + m_prevRatio = ratio; + + } else if (ratio != m_prevRatio) { // If we are processing a block of appreciable length, turn it // into two recursive calls, one for the short smoothing block @@ -736,6 +745,7 @@ void D_SRC::reset() { src_reset(m_src); + m_ratioUnset = true; } #endif /* HAVE_LIBSAMPLERATE */ From a14d32a680ce2b58e3d0b3bb7ae4603f6ad58db1 Mon Sep 17 00:00:00 2001 From: Chris Cannam Date: Fri, 8 Jan 2021 17:11:18 +0000 Subject: [PATCH 23/23] Remove freqmap, it has its own branch now --- main/main.cpp | 530 +++++++++++++++++--------------------------------- 1 file changed, 180 insertions(+), 350 deletions(-) diff --git a/main/main.cpp b/main/main.cpp index 08db475..8a2aa62 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -35,7 +35,7 @@ #include "system/sysutils.h" -#ifdef __MSVC__ +#ifdef _MSC_VER #include "getopt/getopt.h" #else #include @@ -52,7 +52,7 @@ using namespace RubberBand; using RubberBand::gettimeofday; #endif -#ifdef __MSVC__ +#ifdef _MSC_VER using RubberBand::usleep; #endif @@ -104,10 +104,7 @@ int main(int argc, char **argv) bool haveRatio = false; - std::string timeMapFile; - std::string freqMapFile; - std::string pitchMapFile; - bool freqOrPitchMapSpecified = false; + std::string mapfile; enum { NoTransients, @@ -121,8 +118,6 @@ int main(int argc, char **argv) SoftDetector } detector = CompoundDetector; - bool ignoreClipping = false; - while (1) { int optionIndex = 0; @@ -155,9 +150,6 @@ int main(int argc, char **argv) { "threads", 0, 0, '@' }, { "quiet", 0, 0, 'q' }, { "timemap", 1, 0, 'M' }, - { "freqmap", 1, 0, 'Q' }, - { "pitchmap", 1, 0, 'C' }, - { "ignore-clipping", 0, 0, 'i' }, { 0, 0, 0, 0 } }; @@ -178,7 +170,7 @@ int main(int argc, char **argv) case 'R': realtime = true; break; case 'L': precise = false; break; case 'P': precise = true; break; - case 'F': formant = true; break; + case 'F': formant = true; break; case '0': threading = 1; break; case '@': threading = 2; break; case '1': transients = NoTransients; crispchanged = true; break; @@ -193,10 +185,7 @@ int main(int argc, char **argv) case '%': hqpitch = true; break; case 'c': crispness = atoi(optarg); break; case 'q': quiet = true; break; - case 'M': timeMapFile = optarg; break; - case 'Q': freqMapFile = optarg; freqOrPitchMapSpecified = true; break; - case 'C': pitchMapFile = optarg; freqOrPitchMapSpecified = true; break; - case 'i': ignoreClipping = true; break; + case 'M': mapfile = optarg; break; default: help = true; break; } } @@ -206,22 +195,13 @@ int main(int argc, char **argv) return 0; } - if (freqOrPitchMapSpecified) { - if (freqMapFile != "" && pitchMapFile != "") { - cerr << "ERROR: Please specify either pitch map or frequency map, not both" << endl; - return 1; - } - haveRatio = true; - realtime = true; - } - if (help || !haveRatio || optind + 2 != argc) { cerr << endl; - cerr << "Rubber Band" << endl; + cerr << "Rubber Band" << endl; cerr << "An audio time-stretching and pitch-shifting library and utility program." << endl; - cerr << "Copyright 2007-2020 Particular Programs Ltd." << endl; + cerr << "Copyright 2007-2020 Particular Programs Ltd." << endl; cerr << endl; - cerr << " Usage: " << argv[0] << " [options] " << endl; + cerr << " Usage: " << argv[0] << " [options] " << endl; cerr << endl; cerr << "You must specify at least one of the following time and pitch ratio options." << endl; cerr << endl; @@ -233,45 +213,23 @@ int main(int argc, char **argv) cerr << " -p, --pitch Raise pitch by X semitones, or" << endl; cerr << " -f, --frequency Change frequency by multiple X" << endl; cerr << endl; - cerr << "The following options provide ways of making the time and frequency ratios" << endl; - cerr << "change during the audio." << endl; + cerr << " -M, --timemap Use file F as the source for key frame map" << endl; cerr << endl; - cerr << " -M, --timemap Use file F as the source for time map" << endl; + cerr << "A map file consists of a series of lines each having two numbers separated" << endl; + cerr << "by a single space. These are source and target sample frame numbers for fixed" << endl; + cerr << "time points within the audio data, defining a varying stretch factor through" << endl; + cerr << "the audio. You must specify an overall stretch factor using e.g. -t as well." << endl; cerr << endl; - cerr << " A time map (or key-frame map) file contains a series of lines, each with two" << endl; - cerr << " sample frame numbers separated by a single space. These are source and" << endl; - cerr << " target frames for fixed time points within the audio data, defining a varying" << endl; - cerr << " stretch factor through the audio. When supplying a time map you must specify" << endl; - cerr << " an overall stretch factor using -t, -T, or -D as well, to determine the" << endl; - cerr << " total output duration." << endl; - cerr << endl; - cerr << " --pitchmap Use file F as the source for pitch map" << endl; - cerr << endl; - cerr << " A pitch map file contains a series of lines, each with two values: the input" << endl; - cerr << " sample frame number and a pitch offset in semitones, separated by a single" << endl; - cerr << " space. These specify a varying pitch factor through the audio. The offsets" << endl; - cerr << " are all relative to an initial offset specified by the pitch or frequency" << endl; - cerr << " option, or relative to no shift if neither was specified. Offsets are" << endl; - cerr << " not cumulative. This option implies realtime mode (-R) and also enables a" << endl; - cerr << " high-consistency pitch shifting mode, appropriate for dynamic pitch changes." << endl; - cerr << " Because of the use of realtime mode, the overall duration will not be exact." << endl; - cerr << endl; - cerr << " --freqmap Use file F as the source for frequency map" << endl; - cerr << endl; - cerr << " As --pitchmap, except that the second column in the file contains frequency" << endl; - cerr << " multipliers rather than pitch offsets (the same as the difference between" << endl; - cerr << " pitch and frequency options above)." << endl; - cerr << endl; - cerr << "The following options provide a simple way to adjust the sound. See below" << endl; + cerr << "The following options provide a simple way to adjust the sound. See below" << endl; cerr << "for more details." << endl; cerr << endl; cerr << " -c, --crisp Crispness (N = 0,1,2,3,4,5,6); default 5 (see below)" << endl; - cerr << " -F, --formant Enable formant preservation when pitch shifting" << endl; + cerr << " -F, --formant Enable formant preservation when pitch shifting" << endl; cerr << endl; cerr << "The remaining options fine-tune the processing mode and stretch algorithm." << endl; cerr << "These are mostly included for test purposes; the default settings and standard" << endl; cerr << "crispness parameter are intended to provide the best sounding set of options" << endl; - cerr << "for most situations. The default is to use none of these options." << endl; + cerr << "for most situations. The default is to use none of these options." << endl; cerr << endl; cerr << " -L, --loose Relax timing in hope of better transient preservation" << endl; cerr << " -P, --precise Ignored: The opposite of -L, this is default from 1.6" << endl; @@ -289,8 +247,6 @@ int main(int argc, char **argv) cerr << " --pitch-hq In RT mode, use a slower, higher quality pitch shift" << endl; cerr << " --centre-focus Preserve focus of centre material in stereo" << endl; cerr << " (at a cost in width and individual channel quality)" << endl; - cerr << " --ignore-clipping Ignore clipping at output; the default is to restart" << endl; - cerr << " with reduced gain if clipping occurs" << endl; cerr << endl; cerr << " -d, --debug Select debug level (N = 0,1,2,3); default 0, full 3" << endl; cerr << " (N.B. debug level 3 includes audible ticks in output)" << endl; @@ -308,7 +264,7 @@ int main(int argc, char **argv) cerr << " -c 5 default processing options" << endl; cerr << " -c 6 equivalent to --no-lamination --window-short (may be good for drums)" << endl; cerr << endl; - return 2; + return 2; } if (ratio <= 0.0) { @@ -321,12 +277,6 @@ int main(int argc, char **argv) cerr << " provided -- crispness will override these other options" << endl; } - if (hqpitch && freqOrPitchMapSpecified) { - cerr << "WARNING: High-quality pitch mode selected, but frequency or pitch map file is" << endl; - cerr << " provided -- pitch mode will be overridden by high-consistency mode" << endl; - hqpitch = false; - } - switch (crispness) { case -1: crispness = 5; break; case 0: detector = CompoundDetector; transients = NoTransients; lamination = false; longwin = true; shortwin = false; break; @@ -352,35 +302,34 @@ int main(int argc, char **argv) cerr << ")" << endl; } - std::map timeMap; - if (timeMapFile != "") { - std::ifstream ifile(timeMapFile.c_str()); + std::map mapping; + + if (mapfile != "") { + std::ifstream ifile(mapfile.c_str()); if (!ifile.is_open()) { - cerr << "ERROR: Failed to open time map file \"" - << timeMapFile << "\"" << endl; + cerr << "ERROR: Failed to open time map file \"" << mapfile << "\"" + << endl; return 1; } std::string line; int lineno = 0; while (!ifile.eof()) { std::getline(ifile, line); - while (line.length() > 0 && line[0] == ' ') { - line = line.substr(1); - } + while (line.length() > 0 && line[0] == ' ') line = line.substr(1); if (line == "") { ++lineno; continue; } std::string::size_type i = line.find_first_of(" "); if (i == std::string::npos) { - cerr << "ERROR: Time map file \"" << timeMapFile + cerr << "ERROR: Time map file \"" << mapfile << "\" is malformed at line " << lineno << endl; return 1; } size_t source = atoi(line.substr(0, i).c_str()); while (i < line.length() && line[i] == ' ') ++i; size_t target = atoi(line.substr(i).c_str()); - timeMap[source] = target; + mapping[source] = target; if (debug > 0) { cerr << "adding mapping from " << source << " to " << target << endl; } @@ -389,57 +338,7 @@ int main(int argc, char **argv) ifile.close(); if (!quiet) { - cerr << "Read " << timeMap.size() << " line(s) from time map file" << endl; - } - } - - std::map freqMap; - - if (freqOrPitchMapSpecified) { - std::string file = freqMapFile; - bool convertFromPitch = false; - if (pitchMapFile != "") { - file = pitchMapFile; - convertFromPitch = true; - } - std::ifstream ifile(file.c_str()); - if (!ifile.is_open()) { - cerr << "ERROR: Failed to open map file \"" << file << "\"" << endl; - return 1; - } - std::string line; - int lineno = 0; - while (!ifile.eof()) { - std::getline(ifile, line); - while (line.length() > 0 && line[0] == ' ') { - line = line.substr(1); - } - if (line == "") { - ++lineno; - continue; - } - std::string::size_type i = line.find_first_of(" "); - if (i == std::string::npos) { - cerr << "ERROR: Map file \"" << file - << "\" is malformed at line " << lineno << endl; - return 1; - } - size_t source = atoi(line.substr(0, i).c_str()); - while (i < line.length() && line[i] == ' ') ++i; - double freq = atof(line.substr(i).c_str()); - if (convertFromPitch) { - freq = pow(2.0, freq / 12.0); - } - freqMap[source] = freq; - if (debug > 0) { - cerr << "adding mapping for source frame " << source << " of frequency multiplier " << freq << endl; - } - ++lineno; - } - ifile.close(); - - if (!quiet) { - cerr << "Read " << freqMap.size() << " line(s) from frequency map file" << endl; + cerr << "Read " << mapping.size() << " line(s) from map file" << endl; } } @@ -455,9 +354,9 @@ int main(int argc, char **argv) sndfile = sf_open(fileName, SFM_READ, &sfinfo); if (!sndfile) { - cerr << "ERROR: Failed to open input file \"" << fileName << "\": " - << sf_strerror(sndfile) << endl; - return 1; + cerr << "ERROR: Failed to open input file \"" << fileName << "\": " + << sf_strerror(sndfile) << endl; + return 1; } if (sfinfo.samplerate == 0) { @@ -483,9 +382,9 @@ int main(int argc, char **argv) sndfileOut = sf_open(fileNameOut, SFM_WRITE, &sfinfoOut) ; if (!sndfileOut) { - cerr << "ERROR: Failed to open output file \"" << fileNameOut << "\" for writing: " - << sf_strerror(sndfileOut) << endl; - return 1; + cerr << "ERROR: Failed to open output file \"" << fileNameOut << "\" for writing: " + << sf_strerror(sndfileOut) << endl; + return 1; } int ibs = 1024; @@ -502,10 +401,6 @@ int main(int argc, char **argv) if (hqpitch) options |= RubberBandStretcher::OptionPitchHighQuality; if (together) options |= RubberBandStretcher::OptionChannelsTogether; - if (freqOrPitchMapSpecified) { - options |= RubberBandStretcher::OptionPitchHighConsistency; - } - switch (threading) { case 0: options |= RubberBandStretcher::OptionThreadingAuto; @@ -543,134 +438,46 @@ int main(int argc, char **argv) } if (pitchshift != 0.0) { - frequencyshift *= pow(2.0, pitchshift / 12.0); + frequencyshift *= pow(2.0, pitchshift / 12); } cerr << "Using time ratio " << ratio; + cerr << " and frequency ratio " << frequencyshift << endl; - if (!freqOrPitchMapSpecified) { - cerr << " and frequency ratio " << frequencyshift << endl; - } else { - cerr << " and initial frequency ratio " << frequencyshift << endl; - } - #ifdef _WIN32 RubberBand:: #endif timeval tv; (void)gettimeofday(&tv, 0); - + RubberBandStretcher::setDefaultDebugLevel(debug); - size_t countIn = 0, countOut = 0; + RubberBandStretcher ts(sfinfo.samplerate, channels, options, + ratio, frequencyshift); - float gain = 1.f; - bool successful = false; + ts.setExpectedInputDuration(sfinfo.frames); - while (!successful) { // we may have to repeat with a modified - // gain, if clipping occurs - successful = true; + float *fbuf = new float[channels * ibs]; + float **ibuf = new float *[channels]; + for (size_t i = 0; i < channels; ++i) ibuf[i] = new float[ibs]; - RubberBandStretcher ts(sfinfo.samplerate, channels, options, - ratio, frequencyshift); - ts.setExpectedInputDuration(sfinfo.frames); + int frame = 0; + int percent = 0; - float *fbuf = new float[channels * ibs]; - float **ibuf = new float *[channels]; - for (size_t i = 0; i < channels; ++i) { - ibuf[i] = new float[ibs]; + sf_seek(sndfile, 0, SEEK_SET); + + if (!realtime) { + + if (!quiet) { + cerr << "Pass 1: Studying..." << endl; } - int frame = 0; - int percent = 0; - - sf_seek(sndfile, 0, SEEK_SET); - - if (!realtime) { - - if (!quiet) { - cerr << "Pass 1: Studying..." << endl; - } - - while (frame < sfinfo.frames) { - - int count = -1; - - if ((count = sf_readf_float(sndfile, fbuf, ibs)) <= 0) break; - - for (size_t c = 0; c < channels; ++c) { - for (int i = 0; i < count; ++i) { - float value = fbuf[i * channels + c]; - ibuf[c][i] = value; - } - } - - bool final = (frame + ibs >= sfinfo.frames); - - ts.study(ibuf, count, final); - - int p = int((double(frame) * 100.0) / sfinfo.frames); - if (p > percent || frame == 0) { - percent = p; - if (!quiet) { - cerr << "\r" << percent << "% "; - } - } - - frame += ibs; - } - - if (!quiet) { - cerr << "\rCalculating profile..." << endl; - } - - sf_seek(sndfile, 0, SEEK_SET); - } - - frame = 0; - percent = 0; - - if (!timeMap.empty()) { - ts.setKeyFrameMap(timeMap); - } - - std::map::const_iterator freqMapItr = freqMap.begin(); - - countIn = 0; - countOut = 0; - bool clipping = false; - while (frame < sfinfo.frames) { int count = -1; - int thisBlockSize = ibs; - while (freqMapItr != freqMap.end()) { - size_t nextFreqFrame = freqMapItr->first + ts.getLatency(); - if (nextFreqFrame <= countIn) { - double s = frequencyshift * freqMapItr->second; - if (debug > 0) { - cerr << "at frame " << countIn - << " (requested at " << freqMapItr->first - << " plus latency " << ts.getLatency() - << ") updating frequency ratio to " << s << endl; - } - ts.setPitchScale(s); - ++freqMapItr; - } else { - if (nextFreqFrame < countIn + thisBlockSize) { - thisBlockSize = nextFreqFrame - countIn; - } - break; - } - } + if ((count = sf_readf_float(sndfile, fbuf, ibs)) <= 0) break; - if ((count = sf_readf_float(sndfile, fbuf, thisBlockSize)) < 0) { - break; - } - - countIn += count; - for (size_t c = 0; c < channels; ++c) { for (int i = 0; i < count; ++i) { float value = fbuf[i * channels + c]; @@ -678,70 +485,9 @@ int main(int argc, char **argv) } } - bool final = (frame + thisBlockSize >= sfinfo.frames); + bool final = (frame + ibs >= sfinfo.frames); - if (debug > 2) { - cerr << "count = " << count << ", ibs = " << thisBlockSize << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl; - } - - ts.process(ibuf, count, final); - - int avail = ts.available(); - if (debug > 1) cerr << "available = " << avail << endl; - - if (avail > 0) { - float **obf = new float *[channels]; - for (size_t i = 0; i < channels; ++i) { - obf[i] = new float[avail]; - } - ts.retrieve(obf, avail); - countOut += avail; - float *fobf = new float[channels * avail]; - for (size_t c = 0; c < channels; ++c) { - for (int i = 0; i < avail; ++i) { - float value = gain * obf[c][i]; - if (ignoreClipping) { // i.e. just clamp, don't bail out - if (value > 1.f) value = 1.f; - if (value < -1.f) value = -1.f; - } else { - if (value >= 1.f || value < -1.f) { - clipping = true; - gain = (0.999f / fabsf(obf[c][i])); - } - } - fobf[i * channels + c] = value; - } - } - sf_writef_float(sndfileOut, fobf, avail); - delete[] fobf; - for (size_t i = 0; i < channels; ++i) { - delete[] obf[i]; - } - delete[] obf; - } - - if (clipping) { - if (!quiet) { - cerr << "NOTE: Clipping detected at output sample " - << countOut << ", restarting with " - << "reduced gain of " << gain - << " (supply --ignore-clipping to avoid this)" << endl; - } - const float mingain = 0.75f; - if (gain < mingain) { - cerr << "WARNING: Clipped values were implausibly high: " - << "something wrong with input or process - " - << "not reducing gain below " << mingain << endl; - gain = mingain; - ignoreClipping = true; - } - successful = false; - break; - } - - if (frame == 0 && !realtime && !quiet) { - cerr << "Pass 2: Processing..." << endl; - } + ts.study(ibuf, count, final); int p = int((double(frame) * 100.0) / sfinfo.frames); if (p > percent || frame == 0) { @@ -751,55 +497,136 @@ int main(int argc, char **argv) } } - frame += thisBlockSize; + frame += ibs; } - if (!successful) { - sf_seek(sndfile, 0, SEEK_SET); - sf_seek(sndfileOut, 0, SEEK_SET); - continue; - } - if (!quiet) { - cerr << "\r " << endl; + cerr << "\rCalculating profile..." << endl; } - int avail; - while ((avail = ts.available()) >= 0) { + sf_seek(sndfile, 0, SEEK_SET); + } - if (debug > 1) { - cerr << "(completing) available = " << avail << endl; - } - - if (avail > 0) { - float **obf = new float *[channels]; - for (size_t i = 0; i < channels; ++i) { - obf[i] = new float[avail]; - } - ts.retrieve(obf, avail); - countOut += avail; - float *fobf = new float[channels * avail]; - for (size_t c = 0; c < channels; ++c) { - for (int i = 0; i < avail; ++i) { - float value = gain * obf[c][i]; - if (value > 1.f) value = 1.f; - if (value < -1.f) value = -1.f; - fobf[i * channels + c] = value; - } - } - - sf_writef_float(sndfileOut, fobf, avail); - delete[] fobf; - for (size_t i = 0; i < channels; ++i) { - delete[] obf[i]; - } - delete[] obf; - } else { - usleep(10000); - } - } + frame = 0; + percent = 0; + + if (!mapping.empty()) { + ts.setKeyFrameMap(mapping); } + size_t countIn = 0, countOut = 0; + + while (frame < sfinfo.frames) { + + int count = -1; + + if ((count = sf_readf_float(sndfile, fbuf, ibs)) < 0) break; + + countIn += count; + + for (size_t c = 0; c < channels; ++c) { + for (int i = 0; i < count; ++i) { + float value = fbuf[i * channels + c]; + ibuf[c][i] = value; + } + } + + bool final = (frame + ibs >= sfinfo.frames); + + if (debug > 2) { + cerr << "count = " << count << ", ibs = " << ibs << ", frame = " << frame << ", frames = " << sfinfo.frames << ", final = " << final << endl; + } + + ts.process(ibuf, count, final); + + int avail = ts.available(); + if (debug > 1) cerr << "available = " << avail << endl; + + if (avail > 0) { + float **obf = new float *[channels]; + for (size_t i = 0; i < channels; ++i) { + obf[i] = new float[avail]; + } + ts.retrieve(obf, avail); + countOut += avail; + float *fobf = new float[channels * avail]; + for (size_t c = 0; c < channels; ++c) { + for (int i = 0; i < avail; ++i) { + float value = obf[c][i]; + if (value > 1.f) value = 1.f; + if (value < -1.f) value = -1.f; + fobf[i * channels + c] = value; + } + } +// cout << "fobf mean: "; +// double d = 0; +// for (int i = 0; i < avail; ++i) { +// d += fobf[i]; +// } +// d /= avail; +// cout << d << endl; + sf_writef_float(sndfileOut, fobf, avail); + delete[] fobf; + for (size_t i = 0; i < channels; ++i) { + delete[] obf[i]; + } + delete[] obf; + } + + if (frame == 0 && !realtime && !quiet) { + cerr << "Pass 2: Processing..." << endl; + } + + int p = int((double(frame) * 100.0) / sfinfo.frames); + if (p > percent || frame == 0) { + percent = p; + if (!quiet) { + cerr << "\r" << percent << "% "; + } + } + + frame += ibs; + } + + if (!quiet) { + cerr << "\r " << endl; + } + int avail; + + while ((avail = ts.available()) >= 0) { + + if (debug > 1) { + cerr << "(completing) available = " << avail << endl; + } + + if (avail > 0) { + float **obf = new float *[channels]; + for (size_t i = 0; i < channels; ++i) { + obf[i] = new float[avail]; + } + ts.retrieve(obf, avail); + countOut += avail; + float *fobf = new float[channels * avail]; + for (size_t c = 0; c < channels; ++c) { + for (int i = 0; i < avail; ++i) { + float value = obf[c][i]; + if (value > 1.f) value = 1.f; + if (value < -1.f) value = -1.f; + fobf[i * channels + c] = value; + } + } + + sf_writef_float(sndfileOut, fobf, avail); + delete[] fobf; + for (size_t i = 0; i < channels; ++i) { + delete[] obf[i]; + } + delete[] obf; + } else { + usleep(10000); + } + } + sf_close(sndfile); sf_close(sndfileOut); @@ -810,7 +637,7 @@ int main(int argc, char **argv) #ifdef _WIN32 RubberBand:: #endif - timeval etv; + timeval etv; (void)gettimeofday(&etv, 0); etv.tv_sec -= tv.tv_sec; @@ -821,7 +648,10 @@ int main(int argc, char **argv) etv.tv_usec -= tv.tv_usec; double sec = double(etv.tv_sec) + (double(etv.tv_usec) / 1000000.0); - cerr << "elapsed time: " << sec << " sec, in frames/sec: " << countIn/sec << ", out frames/sec: " << countOut/sec << endl; + cerr << "elapsed time: " << sec + << " sec, in frames/sec: " << int64_t(round(countIn/sec)) + << ", out frames/sec: " << int64_t(round(countOut/sec)) + << endl; } RubberBand::Profiler::dump();