Skip to content

Commit

Permalink
Update skew and kurtosis - return NaN when duplicated
Browse files Browse the repository at this point in the history
  • Loading branch information
milesgranger committed Sep 12, 2023
1 parent 69958e4 commit 47fbdcd
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 9 deletions.
8 changes: 7 additions & 1 deletion crick/stats.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ cdef extern from "stats_stubs.c":
np.float64_t m2
np.float64_t m3
np.float64_t m4
bint homogeneous
np.float64_t first_value


stats_t *stats_new()
void stats_free(stats_t *T)
Expand Down Expand Up @@ -73,7 +76,8 @@ cdef class SummaryStats:

def __getstate__(self):
return (self.stats.count, self.stats.sum, self.stats.min,
self.stats.max, self.stats.m2, self.stats.m3, self.stats.m4)
self.stats.max, self.stats.m2, self.stats.m3, self.stats.m4,
self.stats.homogeneous, self.stats.first_value)

def __setstate__(self, state):
self.stats.count = state[0]
Expand All @@ -83,6 +87,8 @@ cdef class SummaryStats:
self.stats.m2 = state[4]
self.stats.m3 = state[5]
self.stats.m4 = state[6]
self.stats.homogeneous = state[7]
self.stats.first_value = state[8]

def add(self, double x, int count=1):
"""add(self, x)
Expand Down
28 changes: 22 additions & 6 deletions crick/stats_stubs.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <stdbool.h>
#include <stdlib.h>
#include <float.h>

Expand All @@ -16,6 +17,8 @@ typedef struct {
npy_float64 m2;
npy_float64 m3;
npy_float64 m4;
bool homogeneous;
npy_float64 first_value;
} stats_t;


Expand All @@ -30,6 +33,8 @@ CRICK_INLINE stats_t *stats_new() {
T->m2 = 0;
T->m3 = 0;
T->m4 = 0;
T->homogeneous = true;
T->first_value = 0.;
return T;
}

Expand Down Expand Up @@ -72,6 +77,14 @@ CRICK_INLINE void stats_do_update(stats_t *T, npy_int64 n2, npy_float64 sum2,

CRICK_INLINE void stats_merge(stats_t *T1, stats_t *T2) {
if (T2->count == 0) return;

// T1 duplicated, but T2 isn't, then T1 no longer duplicated
if (T1->homogeneous && !T2->homogeneous) {
T1->homogeneous = false;
// Otherwise, only continues to be duplicated if their values match
} else if (T1->homogeneous && T2->homogeneous) {
T1->homogeneous = T1->first_value == T2->first_value;
}
stats_do_update(T1, T2->count, T2->sum, T2->min, T2->max,
T2->m4, T2->m3, T2->m2);
}
Expand Down Expand Up @@ -99,8 +112,7 @@ CRICK_INLINE double stats_std(stats_t *T, long ddof) {

CRICK_INLINE double stats_skew(stats_t *T, int bias) {
double n, m2, m3, skew;
// XXX: this should check that T->nunique > 1
if (T->count < 2) return NPY_NAN;
if (T->count < 2 || T->homogeneous) return NPY_NAN;
n = T->count;
m2 = T->m2 / T->count;
m3 = T->m3 / T->count;
Expand All @@ -113,8 +125,7 @@ CRICK_INLINE double stats_skew(stats_t *T, int bias) {

CRICK_INLINE double stats_kurt(stats_t *T, int fisher, int bias) {
double n, m2, m4, kurt;
// XXX: this should check that T->nunique > 1
if (T->count < 2) return NPY_NAN;
if (T->count < 2 || T->homogeneous) return NPY_NAN;
n = T->count;
m2 = T->m2 / T->count;
m4 = T->m4 / T->count;
Expand Down Expand Up @@ -186,8 +197,13 @@ CRICK_INLINE npy_intp stats_update_ndarray(stats_t *T, PyArrayObject *x,
npy_intp count = *innersizeptr;

while (count--) {
stats_add(T, *(npy_float64 *)data_x,
*(npy_int64 *)data_w);
npy_float64 value = *(npy_float64 *)data_x;
if (T->count == 0) {
T->first_value = value;
} else if (T->homogeneous && T->first_value != value) {
T->homogeneous = false;
}
stats_add(T, value, *(npy_int64 *)data_w);

data_x += stride_x;
data_w += stride_w;
Expand Down
4 changes: 2 additions & 2 deletions crick/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_basic_stats(x):
normal,
empty,
one,
pytest.param(duplicate, marks=scipy_xfail_mark),
pytest.param(duplicate),
different,
],
)
Expand All @@ -69,7 +69,7 @@ def test_skew(x, bias):
normal,
empty,
one,
pytest.param(duplicate, marks=scipy_xfail_mark),
pytest.param(duplicate),
different,
],
)
Expand Down

0 comments on commit 47fbdcd

Please sign in to comment.