Skip to content
This repository has been archived by the owner on Jun 17, 2024. It is now read-only.

Commit

Permalink
Merge branch 'master' of github.com:MicrosoftResearch/Azimuth
Browse files Browse the repository at this point in the history
  • Loading branch information
nfusi committed May 12, 2016
2 parents 9cb9ec0 + b7b658b commit 1d85d84
Showing 1 changed file with 120 additions and 0 deletions.
120 changes: 120 additions & 0 deletions azimuth/corrstats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""
Functions for calculating the statistical significant differences between two dependent or independent correlation
coefficients.
The Fisher and Steiger method is adopted from the R package http://personality-project.org/r/html/paired.r.html
and is described in detail in the book 'Statistical Methods for Psychology'
The Zou method is adopted from http://seriousstats.wordpress.com/2012/02/05/comparing-correlations/
Credit goes to the authors of above mentioned packages!
Author: Philipp Singer (www.philippsinger.info)
------------------------------------------------------------
README.md:
CorrelationStats
This Python script enables you to compute statistical significance tests on both dependent and independent correlation coefficients. For each case two methods to choose from are available.
For details, please refer to: http://www.philippsinger.info/?p=347
#copied from on 4/24/2015 from https://github.com/psinger/CorrelationStats/blob/master/corrstats.py
"""


from __future__ import division

__author__ = 'psinger'

import numpy as np
from scipy.stats import t, norm
from math import atanh, pow
from numpy import tanh

def rz_ci(r, n, conf_level = 0.95):
zr_se = pow(1/(n - 3), .5)
moe = norm.ppf(1 - (1 - conf_level)/float(2)) * zr_se
zu = atanh(r) + moe
zl = atanh(r) - moe
return tanh((zl, zu))

def rho_rxy_rxz(rxy, rxz, ryz):
num = (ryz-1/2.*rxy*rxz)*(1-pow(rxy,2)-pow(rxz,2)-pow(ryz,2))+pow(ryz,3)
den = (1 - pow(rxy,2)) * (1 - pow(rxz,2))
return num/float(den)

def dependent_corr(xy, xz, yz, n, twotailed=True, conf_level=0.95, method='steiger'):
"""
Calculates the statistic significance between two dependent correlation coefficients
@param xy: correlation coefficient between x and y
@param xz: correlation coefficient between x and z
@param yz: correlation coefficient between y and z
@param n: number of elements in x, y and z
@param twotailed: whether to calculate a one or two tailed test, only works for 'steiger' method
@param conf_level: confidence level, only works for 'zou' method
@param method: defines the method uses, 'steiger' or 'zou'
@return: t and p-val
"""
if method == 'steiger':
d = xy - xz
determin = 1 - xy * xy - xz * xz - yz * yz + 2 * xy * xz * yz
av = (xy + xz)/2
cube = (1 - yz) * (1 - yz) * (1 - yz)

t2 = d * np.sqrt((n - 1) * (1 + yz)/(((2 * (n - 1)/(n - 3)) * determin + av * av * cube)))
p = 1 - t.cdf(abs(t2), n - 2)

if twotailed:
p *= 2

return t2, p
elif method == 'zou':
L1 = rz_ci(xy, n, conf_level=conf_level)[0]
U1 = rz_ci(xy, n, conf_level=conf_level)[1]
L2 = rz_ci(xz, n, conf_level=conf_level)[0]
U2 = rz_ci(xz, n, conf_level=conf_level)[1]
rho_r12_r13 = rho_rxy_rxz(xy, xz, yz)
lower = xy - xz - pow((pow((xy - L1), 2) + pow((U2 - xz), 2) - 2 * rho_r12_r13 * (xy - L1) * (U2 - xz)), 0.5)
upper = xy - xz + pow((pow((U1 - xy), 2) + pow((xz - L2), 2) - 2 * rho_r12_r13 * (U1 - xy) * (xz - L2)), 0.5)
return lower, upper
else:
raise Exception('Wrong method!')

def independent_corr(xy, ab, n, n2 = None, twotailed=True, conf_level=0.95, method='fisher'):
"""
Calculates the statistic significance between two independent correlation coefficients
@param xy: correlation coefficient between x and y
@param xz: correlation coefficient between a and b
@param n: number of elements in xy
@param n2: number of elements in ab (if distinct from n)
@param twotailed: whether to calculate a one or two tailed test, only works for 'fisher' method
@param conf_level: confidence level, only works for 'zou' method
@param method: defines the method uses, 'fisher' or 'zou'
@return: z and p-val
"""

if method == 'fisher':
xy_z = 0.5 * np.log((1 + xy)/(1 - xy))
xz_z = 0.5 * np.log((1 + ab)/(1 - ab))
if n2 is None:
n2 = n

se_diff_r = np.sqrt(1/(n - 3) + 1/(n2 - 3))
diff = xy_z - xz_z
z = abs(diff / se_diff_r)
p = (1 - norm.cdf(z))
if twotailed:
p *= 2

return z, p
elif method == 'zou':
L1 = rz_ci(xy, n, conf_level=conf_level)[0]
U1 = rz_ci(xy, n, conf_level=conf_level)[1]
L2 = rz_ci(ab, n2, conf_level=conf_level)[0]
U2 = rz_ci(ab, n2, conf_level=conf_level)[1]
lower = xy - ab - pow((pow((xy - L1), 2) + pow((U2 - ab), 2)), 0.5)
upper = xy - ab + pow((pow((U1 - xy), 2) + pow((ab - L2), 2)), 0.5)
return lower, upper
else:
raise Exception('Wrong method!')

#print dependent_corr(.396, .179, .088, 200, method='steiger')
#print independent_corr(.560, .588, 100, 353, method='fisher')

#print dependent_corr(.396, .179, .088, 200, method='zou')
#print independent_corr(.560, .588, 100, 353, method='zou')

0 comments on commit 1d85d84

Please sign in to comment.