-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPearson.m
93 lines (88 loc) · 2.55 KB
/
Pearson.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
function varargout = Pearson(x, y, numSides)
% [pearsonCoef, pVal] = Pearson(x, y, numSides)
% Calculates the Pearson product-momentum correlation coefficient
% INPUTS:
% -x,y: 1D arrays of equal length
% OPTIONAL:
% -numSides: (defaults to 1)
% Set to 1 if only positive correlations are significant,
% 2 if either positive or negative correlations are significant
% OUTPUTS:
% -pearsonCoef: value between -1 and 1, characterizing correlation
% OPTIONAL
% -pVal: probability pearsonCoef generated by uncorrelated
% data. If length(x) < 4, PVal will be NaN
% Because this routine is often called from deep within other routines,
% must perform many sanity checks to ensure that garbage isn't being fed in
if nargin < 2 || nargin > 3
help Pearson
error('Invalid number of input arguments')
elseif nargout < 1 || nargout > 2
help Pearson
error('Invalid number of returned arguments')
end
if nargin == 2
numSides = 1;
end
numX = size(x, 1);
if numX == 1
x = x';
numX = size(x, 1);
elseif numX == 0 || size(x, 2) ~= 1
fprintf(2, 'Size of X = (%g, %g)\n', numX, size(x, 2));
error('X and Y must be 1D arrays with equal length')
end
numY = size(y, 2);
if numY == 1
y = y';
numY = size(y, 2);
elseif numY == 0 || size(y, 1) ~= 1
fprintf(2, 'Size of Y = (%g, %g)\n', numY, size(y, 2));
error('X and Y must be 1D arrays with equal length')
end
if numY ~= numX
fprintf(2, 'Length(X)=%d, length(Y)=%d\n', numX, numY);
error('X and Y must have equal length')
end
% end of sanity checks
% compute the Pearson correlation coefficient by performing a dot product
% on z-scored data
ind = find(isfinite(y') & isfinite(x));
numX = length(ind);
x = x(ind);
y = y(ind);
pearsonCoef = (zscore(y) * zscore(x)) / (numX - 1);
if nargout == 1
% only the Pearson coefficient was requested, return
varargout = {pearsonCoef};
return
end
% compute a p-value on observing this data
if numX < 4
% insufficient numX to compute a p-value
pVal = NaN;
else
if pearsonCoef >= 1
% perfect correlation (occasionally round-off produces coef = 1 + eps)
% corresponds to Z = Inf;
pVal = 0.0;
elseif pearsonCoef <= -1
% perfect anti-correlation
% corresponds to Z = -Inf;
if numSides == 1
pVal = 1.0;
else
pVal = 0.0;
end
else
% the typical case. Compute a p-value by assuming Gaussian distribution
Z = sqrt(numX - 3) * atanh(pearsonCoef);
if numSides == 1
pVal = 0.5 * erfc(Z / sqrt(2));
else
pVal = erfc(abs(Z) / sqrt(2));
end
end
end
varargout = {pearsonCoef, pVal};
return