-
Notifications
You must be signed in to change notification settings - Fork 197
/
Copy pathclean_healthdata.m
66 lines (56 loc) · 1.47 KB
/
clean_healthdata.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
%This m-file cleans the infant health/alcohol consumption data
clear;
clc;
load healthdata.txt;
drink = healthdata(:,1);
education = healthdata(:,5);
havekid_before = healthdata(:,7);
famincome = healthdata(:,8);
parent_drink = healthdata(:,9);
num_visit = healthdata(:,10);
healthprob = healthdata(:,12);
%Create y and r variables;
nobs = length(drink);
r = 3*ones(nobs,1);
points1 = find(drink==1);
points2 = find(drink==2 | drink==3);
r(points1) = 1;
r(points2) = 2;
y = 3*ones(nobs,1);
points3 = find(num_visit==1);
points4 = find(num_visit==2 | num_visit==3);
y(points3) = 1;
y(points4) = 2;
%----------------
%Create Dummy label vector for R-outcome, upper and lower truncation
%regions
%---------------
D = zeros(nobs,3);
a_r = zeros(nobs,1); b_r = a_r;
for i = 1:nobs;
D(i,r(i))=1;
if r(i)==1;
a_r(i) = -999; b_r(i) = 0;
elseif r(i)==2;
a_r(i) = 0; b_r(i) = 1;
else
a_r(i) = 1; b_r(i) = 999;
end;
end;
%-------------------
%Create lower and upper truncation regions for y-outcome
%----------------
a_y = zeros(nobs,1); b_y = a_y;
for i = 1:nobs;
if y(i)==1;
a_y(i) = -999; b_y(i) = 0;
elseif y(i)==2;
a_y(i) = 0; b_y(i) = 1;
else
a_y(i) = 1; b_y(i) = 999;
end;
end;
xy = [education havekid_before famincome healthprob];
xbary = [xy D];
xr = [ones(nobs,1) education famincome parent_drink];
save health_clean y r xbary xr a_r b_r a_y b_y;