Re: implementation note for scapegoat tree



"user923005" <dcorbit@xxxxxxxxx> writes:

Speaking of PSPP, here is an *extremely* robust way to calculate
standard deviation (due to Knuth of course!):

[...]

template <class etype>
void WelfordStandardDeviation::AddItem(etype x)
{
++n;
if (n == 1) {
m[0] = x;
} else {
m[1] = m[0] + (x - m[0]) / n;
s[1] = s[0] + (x - m[0]) * (x - m[1]);
m[0] = m[1]; /* for next time */
s[0] = s[1]; /* for next time */
}
}

I think that this core algorithm is the same one that PSPP uses
for one-pass calculation of moments. The core code in PSPP is:

/* Adds VALUE with the given WEIGHT to the calculation of
one-pass moments. */
void
moments1_add (struct moments1 *m, double value, double weight)
{
assert (m != NULL);

if (value != SYSMIS && weight > 0.)
{
double prev_w, v1;

prev_w = m->w;
m->w += weight;
v1 = (weight / m->w) * (value - m->d1);
m->d1 += v1;

if (m->max_moment >= MOMENT_VARIANCE)
{
double v2 = v1 * v1;
double w_prev_w = m->w * prev_w;
double prev_m2 = m->d2;

m->d2 += w_prev_w / weight * v2;
if (m->max_moment >= MOMENT_SKEWNESS)
{
double w2 = weight * weight;
double v3 = v2 * v1;
double prev_m3 = m->d3;

m->d3 += (-3. * v1 * prev_m2
+ w_prev_w / w2 * (m->w - 2. * weight) * v3);
if (m->max_moment >= MOMENT_KURTOSIS)
{
double w3 = w2 * weight;
double v4 = v2 * v2;

m->d4 += (-4. * v1 * prev_m3
+ 6. * v2 * prev_m2
+ ((pow2 (m->w) - 3. * weight * prev_w)
* v4 * w_prev_w / w3));
}
}
}
}
}

PSPP also has a set of routines for two-pass calculation of
moments.
--
Ben Pfaff
blp@xxxxxxx
.