Re: Sliding Window of events, and Standard Deviation of event duration.



On Mar 12, 2:23 pm, Daniel Pitts
<newsgroup.spamfil...@xxxxxxxxxxxxxxxxxxx> wrote:
I have a Set of Events.

New events can be added at any time, and old events can be removed at
any time. The number of events in the Set may be arbitrarily large.

It is easy to maintain simple aggregates about the events (number of
events by type, or mean duration), but I'm having a much harder time
trying to calculate standard deviation of any of these values without
traversing the entire Event set.

Assuming M is the number of events added/removed, and N is the number of
events total, is there any algorithm or approach I can take to reduce
the complexity from O(N+M) to O(M) or even O(log N + M) or something better?

Unfortunately, my solution seems to lose quite a few digits of
precision:


#include <cmath>

template < typename Number >
class WelfordStandardDeviation {
private:
Number m[2];
Number s[2];
size_t n;
public:
WelfordStandardDeviation(void);
Number GetS(void);
Number GetM(void);
Number GetN(void);
Number GetSigma(void);
void AddItem(Number x);
void RemoveItem(Number x);
};

template < typename Number >
WelfordStandardDeviation<Number>::WelfordStandardDeviation(void)
{
m[0] = (Number) 0;
m[1] = (Number) 0;
s[0] = (Number) 0;
s[1] = (Number) 0;
n = 0;
}

template < typename Number >
Number WelfordStandardDeviation<Number>::GetS(void)
{
if (n >= 2)
return s[0];
return sqrt(-1.); /* Create a NAN */
}

template < typename Number >
Number WelfordStandardDeviation<Number>::GetM(void)
{
if (n >= 2)
return s[0];
return sqrt(-1.); /* Create a NAN */
}

template < typename Number >
Number WelfordStandardDeviation<Number>::GetN(void)
{
return n;
}

template < typename Number >
Number WelfordStandardDeviation<Number>::GetSigma(void)
{
if (n >= 2)
return sqrt(s[0] / (n - 1.0));
return sqrt(-1.); /* Create a NAN */
}

template < typename Number >
void WelfordStandardDeviation<Number>::AddItem(Number x)
{
++n;
if (n == 1) {
m[0] = x;
} else {
m[1] = m[0] + (x - m[0]) / n;
s[1] = s[0] + (x - m[0]) * (x - m[1]);
m[0] = m[1]; /* for next time */
s[0] = s[1]; /* for next time */
}
}

template < typename Number >
void WelfordStandardDeviation<Number>::RemoveItem(Number x)
{
--n;
if (n == 1) {
m[0] = x;
} else {
m[1] = m[0] - (x - m[0]) / n;
s[1] = s[0] - (x - m[0]) * (x - m[1]);
m[0] = m[1]; /* for next time */
s[0] = s[1]; /* for next time */
}
}


#ifdef UNIT_TEST

#include <iostream>
#include <iomanip>
using namespace std;

#include "stats.hpp"
#include "welford.hpp"

int main(void)
{
WelfordStandardDeviation <double> wsd;
WelfordStandardDeviation <double> wsd2;
size_t i;

cout << setprecision(20);

for (i = 0; i < 10000; i++) {
wsd.AddItem((double) i);
}
cout << "Standard deviation of the first 10000 integers is " <<
wsd.GetSigma() << endl;

for (i = 0; i < 100000; i++) {
wsd2.AddItem((double) i);
}
cout << "Standard deviation of the first 100000 integers is " <<
wsd2.GetSigma() << endl;

for (i = 10000; i < 100000; i++) {
wsd2.RemoveItem((double) i);
}
cout << "Standard deviation of the first 10000 integers is " <<
wsd2.GetSigma() << endl;

return 0;
}

/*
C:\tmp>welford
Standard deviation of the first 10000 integers is 2886.8956799071675
Standard deviation of the first 100000 integers is 28867.657796687741
Standard deviation of the first 10000 integers is 2886.8956799694429
*/
#endif /* UNIT_TEST */


.



Relevant Pages