-
Notifications
You must be signed in to change notification settings - Fork 16
/
CIT_2020_CUP_polecon.R
363 lines (310 loc) · 13.2 KB
/
CIT_2020_CUP_polecon.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
#------------------------------------------------------------------------------#
#------------------------------------------------------------------------------#
# A Practical Introduction to Regression Discontinuity Designs: Foundations
# Authors: Matias D. Cattaneo, Nicolas Idrobo and Rocio Titiunik
#------------------------------------------------------------------------------#
# SOFTWARE WEBSITE: https://rdpackages.github.io/
#------------------------------------------------------------------------------#
# TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS:
# FOREIGN: install.packages('foreign')
# GGPLOT2: install.packages('ggplot2')
# GRID: install.packages('grid')
# LPDENSITY: install.packages('lpdensity')
# RDDENSITY: install.packages('rddensity')
# RDLOCRAND: install.packages('rdlocrand')
# RDROBUST: install.packages('rdrobust')
#------------------------------------------------------------------------------#
#------------------------------------------------------------------------------#
# NOTE: if you are using RDROBUST version 2020 or newer, the option
# masspoints="off" may be needed to replicate the results in the monograph.
# For example:
# out = rdrobust(Y,X)
# should be replaced by:
# out = rdrobust(Y,X,masspoints="off",stdvars="on")
#------------------------------------------------------------------------------#
#------------------------------------------------------------------------------#
# NOTE: if you are using RDDENSITY version 2020 or newer, the option
# masspoints=FALSE may be needed to replicate the results in the monograph.
# For example:
# out = rddensity(X)
# should be replaced by:
# out = rddensity(X,massPoints=FALSE)
#------------------------------------------------------------------------------#
#------------------------------------------------------------------------------#
rm(list=ls())
library(foreign)
library(ggplot2)
library(lpdensity)
library(rddensity)
library(rdrobust)
library(rdlocrand)
# Tables 1, 2, 4, 5, and 6 are only constructed in STATA
# Figures 20, 21, and 22 are only constructed in STATA
# Loading the data and defining the main variables
data <- read.dta("CIT_2020_CUP_polecon.dta")
Y <- data$Y
X <- data$X
T <- data$T
T_X <- T*X
#---------------------#
# Section 2 #
# The Sharp RD Design #
#---------------------#
# Figure 3a
# Raw comparison of means
rdplot(Y, X, nbins = c(2500, 500), p = 0, col.lines = "red", col.dots = "black", title = "",
x.label = "Islamic Margin of Victory", y.label = "Female High School Percentage", y.lim = c(0,70))
# Figure 3b
# Local comparison of means
rdplot(Y[abs(X) <= 50], X[abs(X) <= 50], nbins = c(2500, 500), p = 4, col.lines = "red", col.dots = "black", title = "",
x.label = "Islamic Margin of Victory", y.label = "Female High School Percentage", y.lim = c(0,70))
#-----------#
# Section 3 #
# RD Plots #
#-----------#
# R Snippet 1 (Figure 5)
# Scatter plot
plot(X, Y, xlab = "Score", ylab = "Outcome", col = 1, pch = 20)
abline(v=0)
# Figure 6
# RD plot using 40 bins of equal length
out <- rdplot(Y, X, nbins = c(20,20), binselect = 'esmv', y.lim = c(0,25))
summary(out)
# R Snippet 2 (Figure 7a)
# 40 Evenly-spaced bins
out <- rdplot(Y, X, nbins = c(20,20), binselect = 'es', y.lim = c(0,25))
summary(out)
# R Snippet 3 (Figure 7b)
# 40 Quantile-spaced bins
out <- rdplot(Y, X, nbins = c(20,20), binselect = 'qs', x.lim = c(-100,100), y.lim = c(0,25))
summary(out)
# R Snippet 4 (Figure 8)
# IMSE RD plot with evenly-spaced bins
out <- rdplot(Y, X, binselect = 'es', x.lim = c(-100,100), y.lim = c(0,25))
summary(out)
# R Snippet 5 (Figure 9)
# IMSE RD plot with quantile-spaced bins
out <- rdplot(Y, X, binselect = 'qs', x.lim = c(-100,100), y.lim = c(0,25))
summary(out)
# R Snippet 6 (Figure 10)
# Mimicking variance RD plot with evenly-spaced bins
out <- rdplot(Y, X, binselect = 'esmv')
summary(out)
# R Snippet 7 (Figure 11)
# Mimicking variance RD plot with quantile-spaced bins
out <- rdplot(Y, X, binselect = 'qsmv', x.lim = c(-100,100), y.lim = c(0,25))
summary(out)
#----------------------------------------------#
# Section 4 #
# The Continuity-Based Approach to RD Analysis #
#----------------------------------------------#
# R Snippet 8
# Using two regressions to estimate
out <- lm(Y[X < 0 & X >= -20] ~ X[X < 0 & X >= -20])
left_intercept = out$coefficients[1]
print(left_intercept)
out <- lm(Y[X >= 0 & X <= 20] ~ X[X >= 0 & X <= 20])
right_intercept <- out$coefficients[1]
print(right_intercept)
difference <- right_intercept - left_intercept
print(paste("The RD estimator is", difference, sep = " "))
# R Snippet 9
# Using one regression to estimate
T_X <- X * T
out <- lm(Y[X >= -20 & X <= 20] ~ X[X >= -20 & X <= 20] + T[X >= -20 & X <= 20] + T_X[X >= -20 & X <= 20])
summary(out)
# R Snippet 10
# Generating triangular weights
w <- NA
w[X < 0 & X >= -20] <- 1 - abs(X[X < 0 & X >= -20] / 20)
w[X >= 0 & X <= 20] <- 1 - abs(X[X >= 0 & X <= 20] / 20)
# R Snippet 11
# Using two regressions and weights to estimate
out <- lm(Y[X < 0] ~ X[X < 0], weights = w[X < 0])
left_intercept <- out$coefficients[1]
out <- lm(Y[X >= 0] ~ X[X >= 0], weights = w[X >= 0])
right_intercept <- out$coefficients[1]
difference <- right_intercept - left_intercept
print(paste("The RD estimator is", difference, sep = " "))
# R Snippet 12
# Using rdrobust with uniform weights
out <- rdrobust(Y, X, kernel = 'uniform', p = 1, h = 20)
summary(out)
# R Snippet 13
# Using rdrobust with triangular weights
out <- rdrobust(Y, X, kernel = 'triangular', p = 1, h = 20)
summary(out)
# R Snippet 14
# Using rdrobust with triangular weights and p = 2
out <- rdrobust(Y, X, kernel = 'triangular', p = 2, h = 20)
summary(out)
# R Snippet 15
# Using rdbwselect with mserd bandwidth
out <- rdbwselect(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd')
summary(out)
# R Snippet 16
# Using rdbwselect with msetwo bandwidth
out <- rdbwselect(Y, X, kernel = 'triangular', p = 1, bwselect = 'msetwo')
summary(out)
# R Snippet 17
# Using rdrobust with mserd bandwidth
out <- rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd')
summary(out)
# R Snippet 18
# Using rdrobust to show the objects it returns
rdout <- rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd')
print(names(rdout)[1:7])
print(names(rdout)[8:15])
print(names(rdout)[16:23])
print(names(rdout)[24:27])
print(rdout$beta_Y_p_r)
print(rdout$beta_Y_p_l)
# R Snippet 19 (Figure 15)
# Using rdrobust and showing the associated rdplot
bandwidth <- rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd')$bws[1,1]
out <- rdplot(Y[abs(X) <= bandwidth], X[abs(X) <= bandwidth], p = 1, kernel = 'triangular')
summary(out)
# R Snippet 20
# Using rdrobust without regularization term
out <- rdrobust(Y, X, kernel = 'triangular', scaleregul = 0, p = 1, bwselect = 'mserd')
summary(out)
# R Snippet 21
# Using rdrobust with default options
out <- rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd')
summary(out)
# R Snippet 22
# Using rdrobust with default options and showing all the output
out <- rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd', all = TRUE)
summary(out)
# R Snippet 23
# Using rdrobust with cerrd bandwidth
out <- rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'cerrd')
summary(out)
# R Snippet 24
# Using rdbwselect with all the bandwidths
out <- rdbwselect(Y, X, kernel = 'triangular', p = 1, all = TRUE)
summary(out)
# R Snippet 25
# Using rdbwselect with covariates
Z <- cbind(data$vshr_islam1994, data$partycount, data$lpop1994,
data$merkezi, data$merkezp, data$subbuyuk, data$buyuk)
colnames(Z) <- c("vshr_islam1994", "partycount", "lpop1994",
"merkezi", "merkezp", "subbuyuk", "buyuk")
out <- rdbwselect(Y, X, covs = Z, kernel = 'triangular', scaleregul = 1, p = 1, bwselect = 'mserd')
summary(out)
# R Snippet 26
# Using rdrobust with covariates
Z <- cbind(data$vshr_islam1994, data$partycount, data$lpop1994,
data$merkezi, data$merkezp, data$subbuyuk, data$buyuk)
colnames(Z) <- c("vshr_islam1994", "partycount", "lpop1994",
"merkezi", "merkezp", "subbuyuk", "buyuk")
out <- rdrobust(Y, X, covs = Z, kernel = 'triangular', scaleregul = 1, p = 1, bwselect = 'mserd')
summary(out)
# R Snippet 27
# Using rdrobust with clusters
out <- rdrobust(Y, X, kernel = 'triangular', scaleregul = 1, p = 1, bwselect = 'mserd', cluster = data$prov_num)
summary(out)
# R Snippet 28
# Using rdrobust with clusters and covariates
Z <- cbind(data$vshr_islam1994, data$partycount, data$lpop1994,
data$merkezi, data$merkezp, data$subbuyuk, data$buyuk)
colnames(Z) <- c("vshr_islam1994", "partycount", "lpop1994",
"merkezi", "merkezp", "subbuyuk", "buyuk")
out <- rdrobust(Y, X, covs = Z, kernel = 'triangular', scaleregul = 1, p = 1,
bwselect = 'mserd', cluster = data$prov_num)
summary(out)
#-----------------------------------------------#
# Section 5 #
# Validation and Falsification of the RD Design #
#-----------------------------------------------#
# Figure 16
# RD plots for predetermined covariates
rdplot(data$lpop1994, X,
x.label = "Score", y.label = "", title = "")
rdplot(data$partycount, X,
x.label = "Score", y.label = "", title = "")
rdplot(data$vshr_islam1994, X,
x.label = "Score", y.label = "", title = "")
rdplot(data$i89, X,
x.label = "Score", y.label = "", title = "", x.lim = c(-100,100))
rdplot(data$merkezp, X,
x.label = "Score", y.label = "", title = "")
rdplot(data$merkezi, X,
x.label = "Score", y.label = "", title = "")
# R Snippet 29
# Using rdrobust on lpop1994
out <- rdrobust(data$lpop1994, X)
summary(out)
# Formal continuity-based analysis for covariates using CER-optimal bandwidth (not reported in the text)
summary(rdrobust(data$hischshr1520m, X, bwselect = 'cerrd'))
summary(rdrobust(data$i89, X, bwselect = 'cerrd'))
summary(rdrobust(data$vshr_islam1994, X, bwselect = 'cerrd'))
summary(rdrobust(data$partycount, X, bwselect = 'cerrd'))
summary(rdrobust(data$lpop1994, X, bwselect = 'cerrd'))
summary(rdrobust(data$merkezi, X, bwselect = 'cerrd'))
summary(rdrobust(data$merkezp, X, bwselect = 'cerrd'))
summary(rdrobust(data$subbuyuk, X, bwselect = 'cerrd'))
summary(rdrobust(data$buyuk, X, bwselect = 'cerrd'))
# R Snippet 30
# Using rdplot to show the rdrobust effect for lpop1994
bandwidth <- rdrobust(data$lpop1994, X)$bws[1,1]
xlim <- ceiling(bandwidth)
rdplot(data$lpop1994[abs(X) <= bandwidth], X[abs(X) <= bandwidth],
p = 1, kernel = 'triangular', x.lim = c(-xlim, xlim), x.label = "Score",
y.label = "", title = "")
# Figure 17
# Graphical illustration of local linear RD effects for predetermined covariates
bandwidth <- rdrobust(data$lpop1994, X)$bws[1,1]
xlim <- ceiling(bandwidth)
rdplot(data$lpop1994[abs(X) <= bandwidth], X[abs(X) <= bandwidth],
p = 1, kernel = 'triangular', x.lim = c(-xlim, xlim), x.label = "Score",
y.label = "", title = "")
bandwidth <- rdrobust(data$partycount, X)$bws[1,1]
xlim <- ceiling(bandwidth)
rdplot(data$partycount[abs(X) <= bandwidth], X[abs(X) <= bandwidth],
p = 1, kernel = 'triangular', x.lim = c(-xlim, xlim), x.label = "Score",
y.label = "", title = "")
bandwidth <- rdrobust(data$vshr_islam1994, X)$bws[1,1]
xlim <- ceiling(bandwidth)
rdplot(data$vshr_islam1994[abs(X) <= bandwidth], X[abs(X) <= bandwidth],
p = 1, kernel = 'triangular', x.lim = c(-xlim, xlim), x.label = "Score",
y.label = "", title = "")
bandwidth <- rdrobust(data$i89, X)$bws[1,1]
xlim <- ceiling(bandwidth)
rdplot(data$i89[abs(X) <= bandwidth], X[abs(X) <= bandwidth],
p = 1, kernel = 'triangular', x.lim = c(-xlim, xlim), x.label = "Score",
y.label = "", title = "")
# R Snippet 31
# Binomial test
binom.test(53, 100, 1/2)
# R Snippet 32
# Using rddensity
out <- rddensity(X)
summary(out)
# Figure 19a
# Histogram
bw_left <- as.numeric(rddensity(X)$h[1]); bw_right = as.numeric(rddensity(X)$h[2]);
tempdata <- as.data.frame(X); colnames(tempdata) = c("v1");
plot2 <- ggplot(data=tempdata, aes(tempdata$v1)) + theme_bw(base_size = 17) +
geom_histogram(data = tempdata, aes(x = v1, y= ..count..), breaks = seq(-bw_left, 0, 1), fill = "blue", col = "black", alpha = 1) +
geom_histogram(data = tempdata, aes(x = v1, y= ..count..), breaks = seq(0, bw_right, 1), fill = "red", col = "black", alpha = 1) +
labs(x = "Score", y = "Number of Observations") + geom_vline(xintercept = 0, color = "black")
plot2
# Figure 19b
# Estimated Density
est1 <- lpdensity(data = X[X < 0 & X >= -bw_left], grid = seq(-bw_left, 0, 0.1), bwselect = "IMSE",
scale = sum(X < 0 & X >= -bw_left) / length(X))
est2 <- lpdensity(data = X[X >= 0 & X <= bw_right], grid = seq(0, bw_right, 0.1), bwselect = "IMSE",
scale = sum(X >= 0 & X <= bw_right) / length(X))
plot1 <- lpdensity.plot(est1, est2, CIshade = 0.2, lcol = c(4, 2), CIcol = c(4, 2), legendGroups = c("Control", "Treatment"))+
labs(x = "Score", y = "Density") + geom_vline(xintercept = 0, color = "black") +
theme_bw(base_size = 17)+theme(legend.position = c(0.8, 0.85))
plot1
# R Snippet 33
# Using rdrobust with the cutoff equal to 1
out <- rdrobust(Y[X >= 0], X[X >= 0], c = 1)
summary(out)
# R Snippet 34
# Using rdrobust for the donut-hole approach
out <- rdrobust(Y[abs(X) >= 0.3], X[abs(X) >= 0.3])
summary(out)