Now update MovingMedian behaviour to match bsq code (i.e. make it "more" correct)

This commit is contained in:
Chris Cannam
2022-06-10 16:39:32 +01:00
parent 0bfa94a76a
commit 5dcc499cf9
3 changed files with 150 additions and 71 deletions

View File

@@ -44,8 +44,8 @@ class HistogramFilter
public:
HistogramFilter(int nValues, int filterLength) :
m_buffer(filterLength),
m_histogram(nValues, 0) {
}
m_histogram(nValues, 0),
m_mode(-1) { }
~HistogramFilter() { }
int getFilterLength() const {
@@ -65,13 +65,21 @@ public:
--m_histogram[toDrop];
}
m_buffer.writeOne(value);
++m_histogram[value];
int height = ++m_histogram[value];
if (m_mode >= 0 && height >= m_histogram[m_mode]) {
if (height > m_histogram[m_mode] || value < m_mode) {
m_mode = value;
}
}
}
void drop() {
if (m_buffer.getReadSpace() > 0) {
int toDrop = m_buffer.readOne();
--m_histogram[toDrop];
if (toDrop == m_mode) {
m_mode = -1;
}
}
}
@@ -95,6 +103,9 @@ public:
* an equal number of times, return the smallest of them.
*/
int getMode() const {
if (m_mode >= 0) {
return m_mode;
}
int max = 0;
int mode = 0;
for (int i = 0; i < m_histogram.size(); ++i) {
@@ -104,6 +115,7 @@ public:
mode = i;
}
}
m_mode = mode;
return mode;
}
@@ -168,7 +180,7 @@ public:
private:
SingleThreadRingBuffer<int> m_buffer;
std::vector<int> m_histogram;
int m_mode;
mutable int m_mode;
};
}

View File

@@ -37,19 +37,27 @@
namespace RubberBand
{
/**
* A median or modal filter implemented using sorting, for continuous
* values. Pushing a value updates the sorted record, after which you
* can get() the median. You can call drop() to drop the oldest value
* without pushing a new one, for example to drain the filter at the
* tail of the sequence.
*/
template <typename T>
class MovingMedian : public SampleFilter<T>
{
public:
MovingMedian(int filterLength, float percentile = 50.f) :
m_buffer(filterLength),
m_sortspace(filterLength, {})
{
setPercentile(percentile);
}
static constexpr float fifty = 50.f;
~MovingMedian() {
}
public:
MovingMedian(int filterLength, float percentile = fifty) :
m_buffer(filterLength),
m_sortspace(filterLength, {}),
m_fill(0),
m_percentile(percentile)
{ }
~MovingMedian() { }
MovingMedian(const MovingMedian &) =default;
MovingMedian &operator=(const MovingMedian &) =default;
@@ -59,10 +67,7 @@ public:
}
void setPercentile(float p) {
int length = getSize();
m_index = int((length * p) / 100.f);
if (m_index >= length) m_index = length-1;
if (m_index < 0) m_index = 0;
m_percentile = p;
}
void push(T value) {
@@ -70,18 +75,34 @@ public:
std::cerr << "WARNING: MovingMedian: NaN encountered" << std::endl;
value = T();
}
if (m_buffer.getWriteSpace() == 0) {
if (m_fill == getSize()) {
T toDrop = m_buffer.readOne();
dropAndPut(toDrop, value);
m_buffer.writeOne(value);
} else {
put(value);
m_buffer.writeOne(value);
}
m_buffer.writeOne(value);
}
void drop() {
if (m_fill > 0) {
T toDrop = m_buffer.readOne();
drop(toDrop);
}
}
/** Return the median of the values in the filter currently. If
* the median lies between two values, return the first of them.
*/
T get() const {
return m_sortspace[m_index];
int n = m_fill - 1;
if (m_percentile == fifty) { // exact default value
return m_sortspace[n / 2];
} else {
int index = float(n) * m_percentile / 100.f;
if (index >= m_fill) index = m_fill - 1;
return m_sortspace[index];
}
}
void reset() {
@@ -93,26 +114,22 @@ public:
// in-place. Array has length n. Modifies both the filter and the
// array.
//
static void filter(MovingMedian<T> &mm, T *v, int n) {
int fn = mm.getSize();
int lag = fn / 2;
mm.reset();
int i = 0;
for (; i < lag; ++i) {
if (i < n) mm.push(v[i]);
}
for (; i < n; ++i) {
mm.push(v[i]);
v[i-lag] = mm.get();
}
for (; i < lag; ++i) {
// just for the unusual case where lag > n
mm.push(T());
(void)mm.get();
}
for (; i < n + lag; ++i) {
mm.push(T());
v[i-lag] = mm.get();
static void filter(MovingMedian<T> &f, T *v, int n) {
f.reset();
int flen = f.getSize();
int i = -(flen / 2);
int j = 0;
while (i != n) {
if (j < n) {
f.push(v[j]);
} else if (j >= flen) {
f.drop();
}
if (i >= 0) {
v[i] = f.get();
}
++i;
++j;
}
}
@@ -125,18 +142,26 @@ public:
private:
SingleThreadRingBuffer<T> m_buffer;
std::vector<T> m_sortspace;
int m_index;
int m_fill;
float m_percentile;
void dropAndPut(const T &toDrop, const T &toPut) {
// precondition: sorted contains m_length values, one of which is toDrop
// postcondition: sorted contains m_length values, one of which is toPut
// precondition: sorted contains getSize values, one of which is toDrop
// postcondition: sorted contains getSize values, one of which is toPut
// (and one instance of toDrop has been removed)
// This implementation was timed for rather short filters (no
// longer than maybe 16 items). Two binary searches plus a
// memmove should be faster for longer ones.
const int n = getSize();
const int n = m_fill;
#ifdef DEBUG_MM
if (n != getSize()) {
throw std::logic_error("length mismatch");
}
#endif
T *sorted = m_sortspace.data();
int dropIx;
if (toDrop <= *sorted) {
@@ -158,7 +183,7 @@ private:
std::cout << "toDrop = " << toDrop << ", dropIx = " << dropIx << std::endl;
std::cout << "toPut = " << toPut << std::endl;
if (sorted[dropIx] != toDrop) {
throw std::logic_error("element not found");
throw std::logic_error("element not found (a)");
}
#endif
@@ -198,14 +223,15 @@ private:
}
void put(const T &toPut) {
// precondition: sorted contains fewer than m_length values,
// precondition: sorted contains m_fill values, m_fill < m_length,
// packed at the start
// postcondition: sorted contains up to m_length values,
// postcondition: m_fill is incremented, sorted contains m_fill values,
// packed at the start, one of which is toPut
const int n = m_buffer.getReadSpace(); // items in sorted
int n = m_fill;
#ifdef DEBUG_MM
if (n >= m_length) {
if (n >= getSize()) {
throw std::logic_error("length mismatch");
}
#endif
@@ -229,27 +255,81 @@ private:
}
sorted[putIx] = toPut;
++m_fill;
#ifdef DEBUG_MM
std::cout << "after: [";
for (int i = 0; i < n + 1; ++i) {
for (int i = 0; i < m_fill; ++i) {
if (i > 0) std::cout << ",";
std::cout << sorted[i];
}
std::cout << "]" << std::endl;
if (!std::is_sorted(sorted, sorted + n)) {
if (!std::is_sorted(sorted, sorted + m_fill)) {
throw std::logic_error("array is not sorted");
}
#endif
}
void drop(const T &toDrop) {
// precondition: sorted contains m_fill values, m_fill > 0, <= m_length,
// packed at the start, one of which is toDrop
// postcondition: m_fill decremented, sorted contains m_fill values,
// packed at the start, none of which is toDrop
int n = m_fill;
#ifdef DEBUG_MM
if (n <= 0 || n > getSize()) {
throw std::logic_error("length mismatch");
}
#endif
T *sorted = m_sortspace.data();
int dropIx = std::lower_bound(sorted, sorted + n, toDrop) - sorted;
#ifdef DEBUG_MM
std::cout << "\nbefore: [";
for (int i = 0; i < n; ++i) {
if (i > 0) std::cout << ",";
std::cout << sorted[i];
}
std::cout << "]" << std::endl;
std::cout << "toDrop = " << toDrop << ", dropIx = " << dropIx << std::endl;
if (sorted[dropIx] != toDrop) {
throw std::logic_error("element not found (b)");
}
#endif
if (dropIx < n - 1) {
v_move(sorted + dropIx, sorted + dropIx + 1, n - dropIx - 1);
}
--m_fill;
#ifdef DEBUG_MM
std::cout << "after: [";
for (int i = 0; i < m_fill; ++i) {
if (i > 0) std::cout << ",";
std::cout << sorted[i];
}
std::cout << "]" << std::endl;
if (!std::is_sorted(sorted, sorted + m_fill)) {
throw std::logic_error("array is not sorted");
}
#endif
}
};
template <typename T>
class MovingMedianStack
{
public:
MovingMedianStack(int nfilters, int size, float percentile = 50.f) :
m_stack(nfilters, { size, percentile })
MovingMedianStack(int nfilters, int size) :
m_stack(nfilters, { size })
{
}
@@ -260,12 +340,6 @@ public:
return m_stack[0].getSize();
}
void setPercentile(float p) {
for (auto &f: m_stack) {
f.setPercentile(p);
}
}
void push(int filter, T value) {
m_stack[filter].push(value);
}

View File

@@ -34,13 +34,6 @@ namespace tt = boost::test_tools;
BOOST_AUTO_TEST_SUITE(TestSignalBits)
// NB our moving median has different lag behaviour from bsq - we
// begin padded with zeros, while bsq begins with an empty vector. The
// bsq behaviour is imho more correct, and this really shows up in the
// n_1 case below (where the correct answer is surely {1.0} rather
// than {0.0}) but ours is not wholly wrong, more efficient, "usually
// fine"
BOOST_AUTO_TEST_CASE(moving_median_simple_3)
{
MovingMedian<double> mm(3);
@@ -54,7 +47,7 @@ BOOST_AUTO_TEST_CASE(moving_median_simple_4)
{
MovingMedian<double> mm(4);
vector<double> arr { 1.0, 2.0, 3.0, 4.0 };
vector<double> expected { 2.0, 3.0, 3.0, 3.0 };
vector<double> expected { 2.0, 2.0, 3.0, 3.0 };
MovingMedian<double>::filter(mm, arr);
BOOST_TEST(arr == expected, tt::per_element());
}
@@ -72,7 +65,7 @@ BOOST_AUTO_TEST_CASE(moving_median_simple_5_4)
{
MovingMedian<double> mm(5);
vector<double> arr { 1.2, 0.6, 1.0e-6, 1.0 };
vector<double> expected { 1.0e-6, 0.6, 0.6, 1.0e-6 };
vector<double> expected { 0.6, 0.6, 0.6, 0.6 };
MovingMedian<double>::filter(mm, arr);
BOOST_TEST(arr == expected, tt::per_element());
}
@@ -90,7 +83,7 @@ BOOST_AUTO_TEST_CASE(moving_median_n_1)
{
MovingMedian<double> mm(6);
vector<double> arr { 1.0 };
vector<double> expected { 0.0 };
vector<double> expected { 1.0 };
MovingMedian<double>::filter(mm, arr);
BOOST_TEST(arr == expected, tt::per_element());
}