forked from cab79/Matlab_files
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathboxcoxlm.m
144 lines (133 loc) · 5.44 KB
/
boxcoxlm.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
function [LambdaHat,LambdaInterval]=boxcoxlm(y,X,PlotLogLike,LambdaValues,alpha)
% This function helps finding a power transformation y^Lambda to a linear
% model (multiple linear regression, as used in the form of the function
% "regress" of MATLAB's statistical toolbox).
% The purpose of using it is finding a good value for lambda, transofrming
% the dependent variable, through a Box-Cox power transformation
%
% It will calculate the Maximum Likelihood (up to a constant) for all
% values in the vector LambdaValues (Default: -2:0.01:2),
% and plot the relation; in addition it will emphasize the 1-alpha confidence region
% for lambda's best value (the one that maximizes the log-likelihood)
%
% usage: 1) LambdaHat=boxcoxlm(y,X)
% y & X are as defined in the function "regress",
% LambdaHat is the value of Lambda in -2:.01:2 that
% maximizes the Maximum Log-Likelihood(lambda)
% This form will also produce a plot
% 2) LambdaHat=boxcoxlm(y,X,0) - calculate LambdaHat without plotting
% 3) LambdaHat=boxcoxlm(y,X,1,LambdaValues) - enables the user to
% replace -2:.01:2 by any other region and precision
% 4) LambdaHat=boxcoxlm(y,X,1,LambdaValues,alpha) - chooses a
% different than the default (alpha=0.05) value for the
% confidence interval
% 5) [LambdaHat,LambdaInterval]=boxcoxlm(...) - Gives the
% confidence interval for lambda in LambdaInterval
%
% Examples:
% 1) X= [ones(10,1) (1:10)'];
% y = (X * [10;1] + normrnd(0,0.2,10,1)).^2;
% LambdaHat=boxcoxlm(y,X)
%
% This should produce LambdaHat of roughly 0.5
%
% 2) X= [ones(10,1) (1:10)'];
% y = (X * [10;1] + normrnd(0,0.2,10,1)).^2;
% [LambdaHat,LambdaInterval]=boxcoxlm(y,X)
%
% This should produce LambdaHat of roughly 0.5, and its confidence
% interval in LambdaInterval
%
% 3) X= [ones(10,1) (1:10)'];
% y = (X * [10;1] + normrnd(0,0.2,10,1)).^2;
% LambdaHat=boxcoxlm(y,X,0)
%
% Identical to the first example, but does not produce a plot
%
% 4) X= [ones(10,1) (1:10)'];
% y = (X * [10;1] + normrnd(0,0.2,10,1)).^0.5; % Note 0.5 replaced 2 in the power
% LambdaHat=boxcoxlm(y,X,0,[-2:.01:5])
%
% LambdaHat should be rougly 2, and the Lambda possible range
% was replaced by [-2:.01:5] for better coverage
%
% 5) X= [ones(10,1) (1:10)'];
% y = (X * [10;1] + normrnd(0,0.2,10,1)).^0.5;
% LambdaHat=boxcoxlm(y,X,0,[-2:.01:5],0.2)
%
% Same as the last example, 80% confidence interval
%
%
% based on G. E. P. Box, D. R. Cox, "An Analysis of Transformation", Journal of the Royal Statistical Society. Series B (Methodological), Vol. 26, No. 2 (1964) , pp. 211-252
%
% Code written by Hovav Dror, hovav@hotmail.com, March 2006
if nargin<2
fprintf('minimum form: boxcoxlm(y,X) \n');
fprintf('optional form:[LambdaHat,LambdaInterval]=boxcoxlm(y,X,PlotLogLike,LambdaValues,alpha) \n');
fprintf('see function for more details\n');
return;
end;
% put default values if needed
if nargin<5
alpha=0.05;
if nargin<4
LambdaValues= -2:.01:2 ;
if nargin<3
PlotLogLike=1; % Default - plot; if PlotLogLike=0, no plots
end;
end;
end;
MinY=min(y)-realmin;
if MinY<0 % if y contains non-positive values, the transofmation is slightly different
fprintf('y contains non-positive values! Using y+min(y) \n');
y=y+abs(MinY);
end;
SigmaLogY=sum(log(y));
n=length(y);
Xr=eye(n)-X*(X'*X)^(-1)*X';
LogLike=[];
for Lambd=LambdaValues
if Lambd~=0
Ylambda=(y.^Lambd-1)/Lambd;
else
Ylambda=log(y);
end;
VarHat=Ylambda'*Xr*Ylambda/n; % sigma^2 estimate by least squares
% Now calculate this specific lambda LogLike:
LogLike(end+1)=-0.5*n*log(VarHat)+(Lambd-1)*SigmaLogY; % Maximum Log likelihood estimate up to a constant, for a fixed lambda
end;
% Best Lambda:
LambdaHat=mean(LambdaValues(LogLike==max(LogLike)));
% Calculate Confidence Interval
if PlotLogLike==1 || nargout>1
LogLike2=max(LogLike)-LogLike;
LogLikeInterval=LogLike2<0.5*chi2inv(1-alpha,1) ;
if nargout>1
LambdaInterval=[min(LambdaValues(LogLikeInterval)) max(LambdaValues(LogLikeInterval)) ];
end;
end;
% Plots:
if PlotLogLike==1
plot(LambdaValues,LogLike); % plot the Log-Like estimate (up to a constant) vs. Lambda
AxisValues=axis;
hold on
% plot a dotted line showing the best lambda
plot([LambdaHat LambdaHat],[AxisValues(3) max(LogLike)],':');
if LambdaHat<max(LambdaValues) && LambdaHat>min(LambdaValues)
stext=sprintf('%1.1f',LambdaHat);
else
stext=sprintf('out of region',LambdaHat);
end;
text(LambdaHat,mean([AxisValues(3) max(LogLike)]),stext,'HorizontalAlignment','center');
% Now plot the 1-alpha confidence interval
LogLike2(LogLikeInterval)=min(LogLike(LogLikeInterval));
LogLike2(~LogLikeInterval)=AxisValues(3);
stext=sprintf('%2.0f%% confidence interval for \\lambda',round(100*(1-alpha)));
text(LambdaHat,mean([max(LogLike) max(LogLike2)]),stext,'HorizontalAlignment','center');
plot(LambdaValues,LogLike2,':');
hold off
% last, put titles
xlabel('\lambda');
ylabel('Max Log-Likelihood(\lambda), up to a constant');
title('Box-Cox Power Transformation for a Linear Model');
end;