-
-
Notifications
You must be signed in to change notification settings - Fork 7.3k
/
Copy pathwindowed_median.cpp
236 lines (218 loc) · 9.19 KB
/
windowed_median.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
/**
* @file
* @brief An implementation of a median calculation of a sliding window along a
* data stream
*
* @details
* Given a stream of integers, the algorithm calculates the median of a fixed
* size window at the back of the stream. The leading time complexity of this
* algorithm is O(log(N), and it is inspired by the known algorithm to [find
* median from (infinite) data
* stream](https://www.tutorialcup.com/interview/algorithm/find-median-from-data-stream.htm),
* with the proper modifications to account for the finite window size for which
* the median is requested
*
* ### Algorithm
* The sliding window is managed by a list, which guarantees O(1) for both
* pushing and popping. Each new value is pushed to the window back, while a
* value from the front of the window is popped. In addition, the algorithm
* manages a multi-value binary search tree (BST), implemented by std::multiset.
* For each new value that is inserted into the window, it is also inserted to
* the BST. When a value is popped from the window, it is also erased from the
* BST. Both insertion and erasion to/from the BST are O(logN) in time, with N
* the size of the window. Finally, the algorithm keeps a pointer to the root of
* the BST, and updates its position whenever values are inserted or erased
* to/from BST. The root of the tree is the median! Hence, median retrieval is
* always O(1)
*
* Time complexity: O(logN). Space complexity: O(N). N - size of window
* @author [Yaniv Hollander](https://github.com/YanivHollander)
*/
#include <cassert> /// for assert
#include <cstdlib> /// for std::rand - needed in testing
#include <ctime> /// for std::time - needed in testing
#include <list> /// for std::list - used to manage sliding window
#include <set> /// for std::multiset - used to manage multi-value sorted sliding window values
#include <vector> /// for std::vector - needed in testing
/**
* @namespace probability
* @brief Probability algorithms
*/
namespace probability {
/**
* @namespace windowed_median
* @brief Functions for the Windowed Median algorithm implementation
*/
namespace windowed_median {
using Window = std::list<int>;
using size_type = Window::size_type;
/**
* @class WindowedMedian
* @brief A class to calculate the median of a leading sliding window at the
* back of a stream of integer values.
*/
class WindowedMedian {
const size_type _windowSize; ///< sliding window size
Window _window; ///< a sliding window of values along the stream
std::multiset<int> _sortedValues; ///< a DS to represent a balanced
/// multi-value binary search tree (BST)
std::multiset<int>::const_iterator
_itMedian; ///< an iterator that points to the root of the multi-value
/// BST
/**
* @brief Inserts a value to a sorted multi-value BST
* @param value Value to insert
*/
void insertToSorted(int value) {
_sortedValues.insert(value); /// Insert value to BST - O(logN)
const auto sz = _sortedValues.size();
if (sz == 1) { /// For the first value, set median iterator to BST root
_itMedian = _sortedValues.begin();
return;
}
/// If new value goes to left tree branch, and number of elements is
/// even, the new median in the balanced tree is the left child of the
/// median before the insertion
if (value < *_itMedian && sz % 2 == 0) {
--_itMedian; // O(1) - traversing one step to the left child
}
/// However, if the new value goes to the right branch, the previous
/// median's right child is the new median in the balanced tree
else if (value >= *_itMedian && sz % 2 != 0) {
++_itMedian; /// O(1) - traversing one step to the right child
}
}
/**
* @brief Erases a value from a sorted multi-value BST
* @param value Value to insert
*/
void eraseFromSorted(int value) {
const auto sz = _sortedValues.size();
/// If the erased value is on the left branch or the median itself and
/// the number of elements is even, the new median will be the right
/// child of the current one
if (value <= *_itMedian && sz % 2 == 0) {
++_itMedian; /// O(1) - traversing one step to the right child
}
/// However, if the erased value is on the right branch or the median
/// itself, and the number of elements is odd, the new median will be
/// the left child of the current one
else if (value >= *_itMedian && sz % 2 != 0) {
--_itMedian; // O(1) - traversing one step to the left child
}
/// Find the (first) position of the value we want to erase, and erase
/// it
const auto it = _sortedValues.find(value); // O(logN)
_sortedValues.erase(it); // O(logN)
}
public:
/**
* @brief Constructs a WindowedMedian object
* @param windowSize Sliding window size
*/
explicit WindowedMedian(size_type windowSize) : _windowSize(windowSize){};
/**
* @brief Insert a new value to the stream
* @param value New value to insert
*/
void insert(int value) {
/// Push new value to the back of the sliding window - O(1)
_window.push_back(value);
insertToSorted(value); // Insert value to the multi-value BST - O(logN)
if (_window.size() > _windowSize) { /// If exceeding size of window,
/// pop from its left side
eraseFromSorted(
_window.front()); /// Erase from the multi-value BST
/// the window left side value
_window.pop_front(); /// Pop the left side value from the window -
/// O(1)
}
}
/**
* @brief Gets the median of the values in the sliding window
* @return Median of sliding window. For even window size return the average
* between the two values in the middle
*/
float getMedian() const {
if (_sortedValues.size() % 2 != 0) {
return *_itMedian; // O(1)
}
return 0.5f * *_itMedian + 0.5f * *next(_itMedian); /// O(1)
}
/**
* @brief A naive and inefficient method to obtain the median of the sliding
* window. Used for testing!
* @return Median of sliding window. For even window size return the average
* between the two values in the middle
*/
float getMedianNaive() const {
auto window = _window;
window.sort(); /// Sort window - O(NlogN)
auto median =
*next(window.begin(),
window.size() / 2); /// Find value in the middle - O(N)
if (window.size() % 2 != 0) {
return median;
}
return 0.5f * median +
0.5f * *next(window.begin(), window.size() / 2 - 1); /// O(N)
}
};
} // namespace windowed_median
} // namespace probability
/**
* @brief Self-test implementations
* @param vals Stream of values
* @param windowSize Size of sliding window
*/
static void test(const std::vector<int> &vals, int windowSize) {
probability::windowed_median::WindowedMedian windowedMedian(windowSize);
for (const auto val : vals) {
windowedMedian.insert(val);
/// Comparing medians: efficient function vs. Naive one
assert(windowedMedian.getMedian() == windowedMedian.getMedianNaive());
}
}
/**
* @brief Main function
* @param argc command line argument count (ignored)
* @param argv command line array of arguments (ignored)
* @returns 0 on exit
*/
int main(int argc, const char *argv[]) {
/// A few fixed test cases
test({1, 2, 3, 4, 5, 6, 7, 8, 9},
3); /// Array of sorted values; odd window size
test({9, 8, 7, 6, 5, 4, 3, 2, 1},
3); /// Array of sorted values - decreasing; odd window size
test({9, 8, 7, 6, 5, 4, 5, 6}, 4); /// Even window size
test({3, 3, 3, 3, 3, 3, 3, 3, 3}, 3); /// Array with repeating values
test({3, 3, 3, 3, 7, 3, 3, 3, 3}, 3); /// Array with same values except one
test({4, 3, 3, -5, -5, 1, 3, 4, 5},
5); /// Array that includes repeating values including negatives
/// Array with large values - sum of few pairs exceeds MAX_INT. Window size
/// is even - testing calculation of average median between two middle
/// values
test({470211272, 101027544, 1457850878, 1458777923, 2007237709, 823564440,
1115438165, 1784484492, 74243042, 114807987},
6);
/// Random test cases
std::srand(static_cast<unsigned int>(std::time(nullptr)));
std::vector<int> vals;
for (int i = 8; i < 100; i++) {
const auto n =
1 + std::rand() /
((RAND_MAX + 5u) / 20); /// Array size in the range [5, 20]
auto windowSize =
1 + std::rand() / ((RAND_MAX + 3u) /
10); /// Window size in the range [3, 10]
vals.clear();
vals.reserve(n);
for (int i = 0; i < n; i++) {
vals.push_back(
rand() - RAND_MAX); /// Random array values (positive/negative)
}
test(vals, windowSize); /// Testing randomized test
}
return 0;
}