This repository has been archived by the owner on Jun 17, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 89
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of github.com:MicrosoftResearch/Azimuth
- Loading branch information
Showing
1 changed file
with
120 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
""" | ||
Functions for calculating the statistical significant differences between two dependent or independent correlation | ||
coefficients. | ||
The Fisher and Steiger method is adopted from the R package http://personality-project.org/r/html/paired.r.html | ||
and is described in detail in the book 'Statistical Methods for Psychology' | ||
The Zou method is adopted from http://seriousstats.wordpress.com/2012/02/05/comparing-correlations/ | ||
Credit goes to the authors of above mentioned packages! | ||
Author: Philipp Singer (www.philippsinger.info) | ||
------------------------------------------------------------ | ||
README.md: | ||
CorrelationStats | ||
This Python script enables you to compute statistical significance tests on both dependent and independent correlation coefficients. For each case two methods to choose from are available. | ||
For details, please refer to: http://www.philippsinger.info/?p=347 | ||
#copied from on 4/24/2015 from https://github.com/psinger/CorrelationStats/blob/master/corrstats.py | ||
""" | ||
|
||
|
||
from __future__ import division | ||
|
||
__author__ = 'psinger' | ||
|
||
import numpy as np | ||
from scipy.stats import t, norm | ||
from math import atanh, pow | ||
from numpy import tanh | ||
|
||
def rz_ci(r, n, conf_level = 0.95): | ||
zr_se = pow(1/(n - 3), .5) | ||
moe = norm.ppf(1 - (1 - conf_level)/float(2)) * zr_se | ||
zu = atanh(r) + moe | ||
zl = atanh(r) - moe | ||
return tanh((zl, zu)) | ||
|
||
def rho_rxy_rxz(rxy, rxz, ryz): | ||
num = (ryz-1/2.*rxy*rxz)*(1-pow(rxy,2)-pow(rxz,2)-pow(ryz,2))+pow(ryz,3) | ||
den = (1 - pow(rxy,2)) * (1 - pow(rxz,2)) | ||
return num/float(den) | ||
|
||
def dependent_corr(xy, xz, yz, n, twotailed=True, conf_level=0.95, method='steiger'): | ||
""" | ||
Calculates the statistic significance between two dependent correlation coefficients | ||
@param xy: correlation coefficient between x and y | ||
@param xz: correlation coefficient between x and z | ||
@param yz: correlation coefficient between y and z | ||
@param n: number of elements in x, y and z | ||
@param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method | ||
@param conf_level: confidence level, only works for 'zou' method | ||
@param method: defines the method uses, 'steiger' or 'zou' | ||
@return: t and p-val | ||
""" | ||
if method == 'steiger': | ||
d = xy - xz | ||
determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz | ||
av = (xy + xz)/2 | ||
cube = (1 - yz) * (1 - yz) * (1 - yz) | ||
|
||
t2 = d * np.sqrt((n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + av * av * cube))) | ||
p = 1 - t.cdf(abs(t2), n - 2) | ||
|
||
if twotailed: | ||
p *= 2 | ||
|
||
return t2, p | ||
elif method == 'zou': | ||
L1 = rz_ci(xy, n, conf_level=conf_level)[0] | ||
U1 = rz_ci(xy, n, conf_level=conf_level)[1] | ||
L2 = rz_ci(xz, n, conf_level=conf_level)[0] | ||
U2 = rz_ci(xz, n, conf_level=conf_level)[1] | ||
rho_r12_r13 = rho_rxy_rxz(xy, xz, yz) | ||
lower = xy - xz - pow((pow((xy - L1), 2) + pow((U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5) | ||
upper = xy - xz + pow((pow((U1 - xy), 2) + pow((xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5) | ||
return lower, upper | ||
else: | ||
raise Exception('Wrong method!') | ||
|
||
def independent_corr(xy, ab, n, n2 = None, twotailed=True, conf_level=0.95, method='fisher'): | ||
""" | ||
Calculates the statistic significance between two independent correlation coefficients | ||
@param xy: correlation coefficient between x and y | ||
@param xz: correlation coefficient between a and b | ||
@param n: number of elements in xy | ||
@param n2: number of elements in ab (if distinct from n) | ||
@param twotailed: whether to calculate a one or two tailed test, only works for 'fisher' method | ||
@param conf_level: confidence level, only works for 'zou' method | ||
@param method: defines the method uses, 'fisher' or 'zou' | ||
@return: z and p-val | ||
""" | ||
|
||
if method == 'fisher': | ||
xy_z = 0.5 * np.log((1 + xy)/(1 - xy)) | ||
xz_z = 0.5 * np.log((1 + ab)/(1 - ab)) | ||
if n2 is None: | ||
n2 = n | ||
|
||
se_diff_r = np.sqrt(1/(n - 3) + 1/(n2 - 3)) | ||
diff = xy_z - xz_z | ||
z = abs(diff / se_diff_r) | ||
p = (1 - norm.cdf(z)) | ||
if twotailed: | ||
p *= 2 | ||
|
||
return z, p | ||
elif method == 'zou': | ||
L1 = rz_ci(xy, n, conf_level=conf_level)[0] | ||
U1 = rz_ci(xy, n, conf_level=conf_level)[1] | ||
L2 = rz_ci(ab, n2, conf_level=conf_level)[0] | ||
U2 = rz_ci(ab, n2, conf_level=conf_level)[1] | ||
lower = xy - ab - pow((pow((xy - L1), 2) + pow((U2 - ab), 2)), 0.5) | ||
upper = xy - ab + pow((pow((U1 - xy), 2) + pow((ab - L2), 2)), 0.5) | ||
return lower, upper | ||
else: | ||
raise Exception('Wrong method!') | ||
|
||
#print dependent_corr(.396, .179, .088, 200, method='steiger') | ||
#print independent_corr(.560, .588, 100, 353, method='fisher') | ||
|
||
#print dependent_corr(.396, .179, .088, 200, method='zou') | ||
#print independent_corr(.560, .588, 100, 353, method='zou') |