Now update MovingMedian behaviour to match bsq code (i.e. make it "more" correct)

This commit is contained in:
Chris Cannam
2022-06-10 16:39:32 +01:00
parent 0bfa94a76a
commit 5dcc499cf9
3 changed files with 150 additions and 71 deletions

View File

@@ -44,8 +44,8 @@ class HistogramFilter
public: public:
HistogramFilter(int nValues, int filterLength) : HistogramFilter(int nValues, int filterLength) :
m_buffer(filterLength), m_buffer(filterLength),
m_histogram(nValues, 0) { m_histogram(nValues, 0),
} m_mode(-1) { }
~HistogramFilter() { } ~HistogramFilter() { }
int getFilterLength() const { int getFilterLength() const {
@@ -65,13 +65,21 @@ public:
--m_histogram[toDrop]; --m_histogram[toDrop];
} }
m_buffer.writeOne(value); m_buffer.writeOne(value);
++m_histogram[value]; int height = ++m_histogram[value];
if (m_mode >= 0 && height >= m_histogram[m_mode]) {
if (height > m_histogram[m_mode] || value < m_mode) {
m_mode = value;
}
}
} }
void drop() { void drop() {
if (m_buffer.getReadSpace() > 0) { if (m_buffer.getReadSpace() > 0) {
int toDrop = m_buffer.readOne(); int toDrop = m_buffer.readOne();
--m_histogram[toDrop]; --m_histogram[toDrop];
if (toDrop == m_mode) {
m_mode = -1;
}
} }
} }
@@ -95,6 +103,9 @@ public:
* an equal number of times, return the smallest of them. * an equal number of times, return the smallest of them.
*/ */
int getMode() const { int getMode() const {
if (m_mode >= 0) {
return m_mode;
}
int max = 0; int max = 0;
int mode = 0; int mode = 0;
for (int i = 0; i < m_histogram.size(); ++i) { for (int i = 0; i < m_histogram.size(); ++i) {
@@ -104,6 +115,7 @@ public:
mode = i; mode = i;
} }
} }
m_mode = mode;
return mode; return mode;
} }
@@ -168,7 +180,7 @@ public:
private: private:
SingleThreadRingBuffer<int> m_buffer; SingleThreadRingBuffer<int> m_buffer;
std::vector<int> m_histogram; std::vector<int> m_histogram;
int m_mode; mutable int m_mode;
}; };
} }

View File

@@ -37,19 +37,27 @@
namespace RubberBand namespace RubberBand
{ {
/**
* A median or modal filter implemented using sorting, for continuous
* values. Pushing a value updates the sorted record, after which you
* can get() the median. You can call drop() to drop the oldest value
* without pushing a new one, for example to drain the filter at the
* tail of the sequence.
*/
template <typename T> template <typename T>
class MovingMedian : public SampleFilter<T> class MovingMedian : public SampleFilter<T>
{ {
static constexpr float fifty = 50.f;
public: public:
MovingMedian(int filterLength, float percentile = 50.f) : MovingMedian(int filterLength, float percentile = fifty) :
m_buffer(filterLength), m_buffer(filterLength),
m_sortspace(filterLength, {}) m_sortspace(filterLength, {}),
{ m_fill(0),
setPercentile(percentile); m_percentile(percentile)
} { }
~MovingMedian() { ~MovingMedian() { }
}
MovingMedian(const MovingMedian &) =default; MovingMedian(const MovingMedian &) =default;
MovingMedian &operator=(const MovingMedian &) =default; MovingMedian &operator=(const MovingMedian &) =default;
@@ -57,31 +65,44 @@ public:
int getSize() const { int getSize() const {
return m_buffer.getSize(); return m_buffer.getSize();
} }
void setPercentile(float p) {
int length = getSize();
m_index = int((length * p) / 100.f);
if (m_index >= length) m_index = length-1;
if (m_index < 0) m_index = 0;
}
void setPercentile(float p) {
m_percentile = p;
}
void push(T value) { void push(T value) {
if (value != value) { if (value != value) {
std::cerr << "WARNING: MovingMedian: NaN encountered" << std::endl; std::cerr << "WARNING: MovingMedian: NaN encountered" << std::endl;
value = T(); value = T();
} }
if (m_buffer.getWriteSpace() == 0) { if (m_fill == getSize()) {
T toDrop = m_buffer.readOne(); T toDrop = m_buffer.readOne();
dropAndPut(toDrop, value); dropAndPut(toDrop, value);
m_buffer.writeOne(value);
} else { } else {
put(value); put(value);
m_buffer.writeOne(value);
} }
m_buffer.writeOne(value);
} }
void drop() {
if (m_fill > 0) {
T toDrop = m_buffer.readOne();
drop(toDrop);
}
}
/** Return the median of the values in the filter currently. If
* the median lies between two values, return the first of them.
*/
T get() const { T get() const {
return m_sortspace[m_index]; int n = m_fill - 1;
if (m_percentile == fifty) { // exact default value
return m_sortspace[n / 2];
} else {
int index = float(n) * m_percentile / 100.f;
if (index >= m_fill) index = m_fill - 1;
return m_sortspace[index];
}
} }
void reset() { void reset() {
@@ -93,26 +114,22 @@ public:
// in-place. Array has length n. Modifies both the filter and the // in-place. Array has length n. Modifies both the filter and the
// array. // array.
// //
static void filter(MovingMedian<T> &mm, T *v, int n) { static void filter(MovingMedian<T> &f, T *v, int n) {
int fn = mm.getSize(); f.reset();
int lag = fn / 2; int flen = f.getSize();
mm.reset(); int i = -(flen / 2);
int i = 0; int j = 0;
for (; i < lag; ++i) { while (i != n) {
if (i < n) mm.push(v[i]); if (j < n) {
} f.push(v[j]);
for (; i < n; ++i) { } else if (j >= flen) {
mm.push(v[i]); f.drop();
v[i-lag] = mm.get(); }
} if (i >= 0) {
for (; i < lag; ++i) { v[i] = f.get();
// just for the unusual case where lag > n }
mm.push(T()); ++i;
(void)mm.get(); ++j;
}
for (; i < n + lag; ++i) {
mm.push(T());
v[i-lag] = mm.get();
} }
} }
@@ -125,18 +142,26 @@ public:
private: private:
SingleThreadRingBuffer<T> m_buffer; SingleThreadRingBuffer<T> m_buffer;
std::vector<T> m_sortspace; std::vector<T> m_sortspace;
int m_index; int m_fill;
float m_percentile;
void dropAndPut(const T &toDrop, const T &toPut) { void dropAndPut(const T &toDrop, const T &toPut) {
// precondition: sorted contains m_length values, one of which is toDrop // precondition: sorted contains getSize values, one of which is toDrop
// postcondition: sorted contains m_length values, one of which is toPut // postcondition: sorted contains getSize values, one of which is toPut
// (and one instance of toDrop has been removed) // (and one instance of toDrop has been removed)
// This implementation was timed for rather short filters (no // This implementation was timed for rather short filters (no
// longer than maybe 16 items). Two binary searches plus a // longer than maybe 16 items). Two binary searches plus a
// memmove should be faster for longer ones. // memmove should be faster for longer ones.
const int n = getSize(); const int n = m_fill;
#ifdef DEBUG_MM
if (n != getSize()) {
throw std::logic_error("length mismatch");
}
#endif
T *sorted = m_sortspace.data(); T *sorted = m_sortspace.data();
int dropIx; int dropIx;
if (toDrop <= *sorted) { if (toDrop <= *sorted) {
@@ -158,7 +183,7 @@ private:
std::cout << "toDrop = " << toDrop << ", dropIx = " << dropIx << std::endl; std::cout << "toDrop = " << toDrop << ", dropIx = " << dropIx << std::endl;
std::cout << "toPut = " << toPut << std::endl; std::cout << "toPut = " << toPut << std::endl;
if (sorted[dropIx] != toDrop) { if (sorted[dropIx] != toDrop) {
throw std::logic_error("element not found"); throw std::logic_error("element not found (a)");
} }
#endif #endif
@@ -198,14 +223,15 @@ private:
} }
void put(const T &toPut) { void put(const T &toPut) {
// precondition: sorted contains fewer than m_length values, // precondition: sorted contains m_fill values, m_fill < m_length,
// packed at the start // packed at the start
// postcondition: sorted contains up to m_length values, // postcondition: m_fill is incremented, sorted contains m_fill values,
// packed at the start, one of which is toPut // packed at the start, one of which is toPut
const int n = m_buffer.getReadSpace(); // items in sorted
int n = m_fill;
#ifdef DEBUG_MM #ifdef DEBUG_MM
if (n >= m_length) { if (n >= getSize()) {
throw std::logic_error("length mismatch"); throw std::logic_error("length mismatch");
} }
#endif #endif
@@ -229,27 +255,81 @@ private:
} }
sorted[putIx] = toPut; sorted[putIx] = toPut;
++m_fill;
#ifdef DEBUG_MM #ifdef DEBUG_MM
std::cout << "after: ["; std::cout << "after: [";
for (int i = 0; i < n + 1; ++i) { for (int i = 0; i < m_fill; ++i) {
if (i > 0) std::cout << ","; if (i > 0) std::cout << ",";
std::cout << sorted[i]; std::cout << sorted[i];
} }
std::cout << "]" << std::endl; std::cout << "]" << std::endl;
if (!std::is_sorted(sorted, sorted + n)) { if (!std::is_sorted(sorted, sorted + m_fill)) {
throw std::logic_error("array is not sorted"); throw std::logic_error("array is not sorted");
} }
#endif #endif
} }
void drop(const T &toDrop) {
// precondition: sorted contains m_fill values, m_fill > 0, <= m_length,
// packed at the start, one of which is toDrop
// postcondition: m_fill decremented, sorted contains m_fill values,
// packed at the start, none of which is toDrop
int n = m_fill;
#ifdef DEBUG_MM
if (n <= 0 || n > getSize()) {
throw std::logic_error("length mismatch");
}
#endif
T *sorted = m_sortspace.data();
int dropIx = std::lower_bound(sorted, sorted + n, toDrop) - sorted;
#ifdef DEBUG_MM
std::cout << "\nbefore: [";
for (int i = 0; i < n; ++i) {
if (i > 0) std::cout << ",";
std::cout << sorted[i];
}
std::cout << "]" << std::endl;
std::cout << "toDrop = " << toDrop << ", dropIx = " << dropIx << std::endl;
if (sorted[dropIx] != toDrop) {
throw std::logic_error("element not found (b)");
}
#endif
if (dropIx < n - 1) {
v_move(sorted + dropIx, sorted + dropIx + 1, n - dropIx - 1);
}
--m_fill;
#ifdef DEBUG_MM
std::cout << "after: [";
for (int i = 0; i < m_fill; ++i) {
if (i > 0) std::cout << ",";
std::cout << sorted[i];
}
std::cout << "]" << std::endl;
if (!std::is_sorted(sorted, sorted + m_fill)) {
throw std::logic_error("array is not sorted");
}
#endif
}
}; };
template <typename T> template <typename T>
class MovingMedianStack class MovingMedianStack
{ {
public: public:
MovingMedianStack(int nfilters, int size, float percentile = 50.f) : MovingMedianStack(int nfilters, int size) :
m_stack(nfilters, { size, percentile }) m_stack(nfilters, { size })
{ {
} }
@@ -260,12 +340,6 @@ public:
return m_stack[0].getSize(); return m_stack[0].getSize();
} }
void setPercentile(float p) {
for (auto &f: m_stack) {
f.setPercentile(p);
}
}
void push(int filter, T value) { void push(int filter, T value) {
m_stack[filter].push(value); m_stack[filter].push(value);
} }

View File

@@ -34,13 +34,6 @@ namespace tt = boost::test_tools;
BOOST_AUTO_TEST_SUITE(TestSignalBits) BOOST_AUTO_TEST_SUITE(TestSignalBits)
// NB our moving median has different lag behaviour from bsq - we
// begin padded with zeros, while bsq begins with an empty vector. The
// bsq behaviour is imho more correct, and this really shows up in the
// n_1 case below (where the correct answer is surely {1.0} rather
// than {0.0}) but ours is not wholly wrong, more efficient, "usually
// fine"
BOOST_AUTO_TEST_CASE(moving_median_simple_3) BOOST_AUTO_TEST_CASE(moving_median_simple_3)
{ {
MovingMedian<double> mm(3); MovingMedian<double> mm(3);
@@ -54,7 +47,7 @@ BOOST_AUTO_TEST_CASE(moving_median_simple_4)
{ {
MovingMedian<double> mm(4); MovingMedian<double> mm(4);
vector<double> arr { 1.0, 2.0, 3.0, 4.0 }; vector<double> arr { 1.0, 2.0, 3.0, 4.0 };
vector<double> expected { 2.0, 3.0, 3.0, 3.0 }; vector<double> expected { 2.0, 2.0, 3.0, 3.0 };
MovingMedian<double>::filter(mm, arr); MovingMedian<double>::filter(mm, arr);
BOOST_TEST(arr == expected, tt::per_element()); BOOST_TEST(arr == expected, tt::per_element());
} }
@@ -72,7 +65,7 @@ BOOST_AUTO_TEST_CASE(moving_median_simple_5_4)
{ {
MovingMedian<double> mm(5); MovingMedian<double> mm(5);
vector<double> arr { 1.2, 0.6, 1.0e-6, 1.0 }; vector<double> arr { 1.2, 0.6, 1.0e-6, 1.0 };
vector<double> expected { 1.0e-6, 0.6, 0.6, 1.0e-6 }; vector<double> expected { 0.6, 0.6, 0.6, 0.6 };
MovingMedian<double>::filter(mm, arr); MovingMedian<double>::filter(mm, arr);
BOOST_TEST(arr == expected, tt::per_element()); BOOST_TEST(arr == expected, tt::per_element());
} }
@@ -90,7 +83,7 @@ BOOST_AUTO_TEST_CASE(moving_median_n_1)
{ {
MovingMedian<double> mm(6); MovingMedian<double> mm(6);
vector<double> arr { 1.0 }; vector<double> arr { 1.0 };
vector<double> expected { 0.0 }; vector<double> expected { 1.0 };
MovingMedian<double>::filter(mm, arr); MovingMedian<double>::filter(mm, arr);
BOOST_TEST(arr == expected, tt::per_element()); BOOST_TEST(arr == expected, tt::per_element());
} }