Skip to content

Commit

Permalink
Fix issue.
Browse files Browse the repository at this point in the history
  • Loading branch information
ncruces committed Jan 19, 2025
1 parent 1677b97 commit f12b411
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 13 deletions.
3 changes: 3 additions & 0 deletions ext/stats/percentile.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import (
"github.com/ncruces/sort/quick"
)

// Compatible with:
// https://sqlite.org/src/file/ext/misc/percentile.c

const (
median = iota
percentile_100
Expand Down
34 changes: 21 additions & 13 deletions ext/stats/welford.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,10 @@ import (
)

// Welford's algorithm with Kahan summation:
// The effect of truncation in statistical computation [van Reeken, AJ 1970]
// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
// https://en.wikipedia.org/wiki/Kahan_summation_algorithm

// See also:
// https://duckdb.org/docs/sql/aggregates.html#statistical-aggregates

type welford struct {
m1, m2 kahan
n int64
Expand All @@ -39,17 +37,22 @@ func (w welford) stddev_samp() float64 {
}

func (w *welford) enqueue(x float64) {
w.n++
n := w.n + 1
w.n = n
d1 := x - w.m1.hi - w.m1.lo
w.m1.add(d1 / float64(w.n))
w.m1.add(d1 / float64(n))
d2 := x - w.m1.hi - w.m1.lo
w.m2.add(d1 * d2)
}

func (w *welford) dequeue(x float64) {
w.n--
n := w.n - 1
w.n = n
if n <= 0 {
return
}
d1 := x - w.m1.hi - w.m1.lo
w.m1.sub(d1 / float64(w.n))
w.m1.sub(d1 / float64(n))
d2 := x - w.m1.hi - w.m1.lo
w.m2.sub(d1 * d2)
}
Expand Down Expand Up @@ -139,11 +142,12 @@ func (w welford2) regr_json() string {
}

func (w *welford2) enqueue(y, x float64) {
w.n++
n := w.n + 1
w.n = n
d1y := y - w.m1y.hi - w.m1y.lo
d1x := x - w.m1x.hi - w.m1x.lo
w.m1y.add(d1y / float64(w.n))
w.m1x.add(d1x / float64(w.n))
w.m1y.add(d1y / float64(n))
w.m1x.add(d1x / float64(n))
d2y := y - w.m1y.hi - w.m1y.lo
d2x := x - w.m1x.hi - w.m1x.lo
w.m2y.add(d1y * d2y)
Expand All @@ -152,11 +156,15 @@ func (w *welford2) enqueue(y, x float64) {
}

func (w *welford2) dequeue(y, x float64) {
w.n--
n := w.n - 1
w.n = n
if n <= 0 {
return
}
d1y := y - w.m1y.hi - w.m1y.lo
d1x := x - w.m1x.hi - w.m1x.lo
w.m1y.sub(d1y / float64(w.n))
w.m1x.sub(d1x / float64(w.n))
w.m1y.sub(d1y / float64(n))
w.m1x.sub(d1x / float64(n))
d2y := y - w.m1y.hi - w.m1y.lo
d2x := x - w.m1x.hi - w.m1x.lo
w.m2y.sub(d1y * d2y)
Expand Down
22 changes: 22 additions & 0 deletions ext/stats/welford_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,16 @@ func Test_welford(t *testing.T) {
if s1.var_pop() != s2.var_pop() {
t.Errorf("got %v, want %v", s1, s2)
}

s1.dequeue(16)
s1.dequeue(7)
s1.dequeue(13)
s1.enqueue(16)
s1.enqueue(7)
s1.enqueue(13)
if s1.var_pop() != s2.var_pop() {
t.Errorf("got %v, want %v", s1, s2)
}
}

func Test_covar(t *testing.T) {
Expand Down Expand Up @@ -65,6 +75,18 @@ func Test_covar(t *testing.T) {
if c1.covar_pop() != c2.covar_pop() {
t.Errorf("got %v, want %v", c1.covar_pop(), c2.covar_pop())
}

c1.dequeue(2, 60)
c1.dequeue(5, 80)
c1.dequeue(4, 75)
c1.dequeue(7, 90)
c1.enqueue(2, 60)
c1.enqueue(5, 80)
c1.enqueue(4, 75)
c1.enqueue(7, 90)
if c1.covar_pop() != c2.covar_pop() {
t.Errorf("got %v, want %v", c1.covar_pop(), c2.covar_pop())
}
}

func Test_correlation(t *testing.T) {
Expand Down

0 comments on commit f12b411

Please sign in to comment.