-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathstat_bstrp_pvalue.py
69 lines (55 loc) · 2.17 KB
/
stat_bstrp_pvalue.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""
calculating p value using bootstrap sampling.
Author: Chao Huang (chaohuang.stat@gmail.com)
Last update: 2017-08-14
"""
import numpy as np
from scipy import stats
from stat_lpks import lpks
from stat_lpks_pre_bw import lpks_pre_bw
from stat_sif import sif
from stat_grs import grs
from stat_wald_ht import wald_ht
# from sklearn.cluster import KMeans
# from stat_gap import gap
"""
installed all the libraries above
"""
def bstrp_pvalue(coord_mat, x_design, y_design, cdesign, gstat, num_bstrp, thres, area):
"""
Smoothing individual function without preselected bandwidth.
Args:
coord_mat (matrix): common coordinate matrix (l*d)
x_design (matrix): design matrix (n*p)
y_design (matrix): shape data (response matrix, n*l*m, m=d in MFSDA)
cdesign (matrix): linear constraint matrix (1*(p-1))
gstat (scalar): global test statistic
num_bstrp (scalar): number of bootstrap
thres (scalar): thresholding for clustering
area (scalar): area of the largest connected region
"""
# under the null hypothesis
x_design0 = x_design[:, np.nonzero(cdesign == 0)[0]]
efit_beta0, efity_design0, h_opt0 = lpks(coord_mat, x_design0, y_design)
resy_design0 = y_design - efity_design0
efit_eta0, res_eta0, esig_eta0 = sif(coord_mat, resy_design0, h_opt0)
# Bootstrap procedures
gstatvec = np.zeros((1, num_bstrp))
simlpval_area = np.zeros((1, num_bstrp))
for gii in range(num_bstrp):
simy_design = grs(efity_design0, efit_eta0, res_eta0)
simefit_beta = lpks_pre_bw(coord_mat, x_design, simy_design, h_opt0)[0]
sim_gstat, sim_lstat = wald_ht(x_design, simefit_beta, esig_eta0, cdesign)
gstatvec[0, gii] = sim_gstat
sim_lpval = 1 - stats.chi2.cdf(sim_lstat, simy_design.shape[2])
sim_ind_thres = sim_lpval <= 10**(-thres)
simlpval_area[0, gii] = np.sum(sim_ind_thres)
k1 = np.mean(gstatvec)
k2 = np.var(gstatvec)
k3 = np.mean((gstatvec - k1)**3)
a = k3/(4*k2)
b = k1-2*k2**2/k3
d = 8*k2**3/(k3**2)
gpval = 1 - stats.chi2.cdf((gstat - b)/a, d)
clu_pval = np.sum(simlpval_area >= area)/num_bstrp
return gpval, clu_pval