diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..7a73a41
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,2 @@
\ No newline at end of file
diff --git a/Project.toml b/Project.toml
index 350f3b9..81079f2 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,15 +1,17 @@
name = "HDMjl"
uuid = "8de29b41-9195-4bda-bbba-e5831b2a12ad"
authors = ["Jhon Flores Rojas", "Rodrigo Grijalba", "Alexander Quispe", "Anzony Quispe"]
-version = "0.0.10"
+version = "0.0.11"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
DataFrames = "0.22.7, 1"
diff --git a/README.md b/README.md
index 186e225..b855120 100644
--- a/README.md
+++ b/README.md
@@ -67,44 +67,57 @@ The Post-Lasso procedure fits an OLS regression excluding the variables not prev
We can estimate the models using Lasso
-julia> rlasso(X, Y, post = false)
-Dict{String, Any} with 15 entries:
- "tss" => 6512.49
- "dev" => [6.75884, -13.5819, -2.81122, -3.94462, 17.3342, -1.2805, 3.16503, -4.74853, 6.944, 15.2907 … …
- "model" => [0.390896 0.179228 … 2.36678 2.01764; -0.720606 -1.12332 … 0.169248 -0.831435; … ; 1.2457 0.7669…
- "loadings" => [1.70326, 1.86338, 2.02143, 1.85829, 1.5416, 1.74625, 1.94735, 1.38887, 1.7228, 1.59366 … 1.65…
- "sigma" => 1.71111
- "lambda0" => 81.3601
- "lambda" => [138.577, 151.605, 164.464, 151.191, 125.424, 142.075, 158.436, 112.998, 140.167, 129.66 … 134…
- "intercept" => -0.118988
- "iter" => 16
- "residuals" => [1.8377, -2.33523, 0.707157, -0.0587436, 3.81226, 0.637385, 0.117754, -0.209206, 1.49168, 2.2032…
- "rss" => 289.863
- "index" => Bool[1, 1, 1, 0, 0, 0, 0, 0, 0, 0 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- "beta" => [4.15731, 4.35612, 3.69875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
- "options" => Dict{String, Any}("intercept"=>true, "post"=>false, "meanx"=>[-0.217494 0.000263084 … -0.0073734…
- "coefficients" => [-0.118988, 4.15731, 4.35612, 3.69875, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0,…
+julia> lasso_reg = rlasso(X, Y, post = false)
+julia> r_summary(lasso_reg)
+ Post-Lasso Estimation: false
+ Total number of variables: 100
+ Number of selected variables: 9
+ ---
+============ ==============
+ Variable Estimate
+============ ==============
+ Intercept -0.0588327
+ V 1 4.84428
+ V 2 4.73331
+ V 3 4.99116
+ V 4 -0.0166025
+ V 43 -0.10963
+ V 64 0.000400857
+ V 69 -0.0359718
+ V 94 0.00666321
+ V 100 0.166262
+============ ==============
+ ----
+ Multiple R-squared: 0.9883821717933302
+ Adjusted R-squared: 0.9872203889726632
and Post-Lasso
-julia> rlasso(X, Y, post = true)
-Dict{String, Any} with 15 entries:
- "tss" => 6512.49
- "dev" => [6.75884, -13.5819, -2.81122, -3.94462, 17.3342, -1.2805, 3.16503, -4.74853, 6.944, 15.2907 … …
- "model" => [0.390896 0.179228 … 2.36678 2.01764; -0.720606 -1.12332 … 0.169248 -0.831435; … ; 1.2457 0.7669…
- "loadings" => [0.93007, 0.992403, 0.863634, 1.00966, 0.876833, 0.858748, 1.00182, 0.892263, 1.07537, 1.01695 …
- "sigma" => 0.925277
- "lambda0" => 81.3601
- "lambda" => [75.6706, 80.7419, 70.2653, 82.1458, 71.3392, 69.8678, 81.5081, 72.5946, 87.4919, 82.7389 … 68…
- "intercept" => 0.0258985
- "iter" => 5
- "residuals" => [0.733002, 0.22571, 1.06845, 1.34666, 0.818648, 0.575327, -0.519747, 0.985208, -0.000283277, -0.…
- "rss" => 84.7576
- "index" => Bool[1, 1, 1, 0, 0, 0, 0, 0, 0, 0 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- "beta" => [4.94557, 5.14366, 4.8095, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…
- "options" => Dict{String, Any}("intercept"=>true, "post"=>true, "meanx"=>[-0.217494 0.000263084 … -0.00737349…
- "coefficients" => [0.0258985, 4.94557, 5.14366, 4.8095, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0, …
+julia> post_lasso_reg = rlasso(X, Y, post = true)
+julia> r_summary(post_lasso_reg)
+ Post-Lasso Estimation: true
+ Total number of variables: 100
+ Number of selected variables: 3
+ ---
+============ ==============
+ Variable Estimate
+============ ==============
+ Intercept -0.00682754
+ V 1 5.00958
+ V 2 4.93178
+ V 3 5.17705
+============ ==============
+ ----
+ Multiple R-squared: 0.9878595381779292
+ Adjusted R-squared: 0.9874801487459894
### Inference on Target Coefficients through Orthogonal Estimating Equations
@@ -130,34 +143,31 @@ julia> d = GrowthData[:, 3];
julia> X = Matrix(GrowthData[:, Not(1, 2, 3)]);
-julia> rlassoEffect(X, y, d, method = "double selection")
-Dict{String, Any} with 10 entries:
- "alpha" => -0.0500059
- "t" => -3.16666
- "se" => 0.0157914
- "no_select" => 0
- "coefficients_reg" => [-0.406451, -0.0500059, -0.0782423, -0.574676, 0.0511529, -0.0470218, 0.212279, -0.000376038, 0…
- "sample_size" => 90
- "coefficient" => -0.0500059
- "selection_index" => Bool[1, 0, 1, 0, 1, 0, 0, 0, 0, 0 … 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
- "residuals" => Dict("v"=>[0.497555, 0.183798, 0.0705184, -0.123959, 0.0872214, 0.311811, 0.273583, 0.800463, -…
- "coefficients" => -0.0500059
+julia> doublesel_effect = rlassoEffect(X, y, d, method = "double selection");
+julia> r_summary(doublesel_effect);
+Estimates and significance testing of the effect of target variables
+ Row Estimate. Std. Error t value Pr(>|t|)
+ 1 -0.05001 0.01579 -3.16719 0.00154 **
+Signif. codes:
+0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We can also use `partialling out` for the orthogonal estimating equations.
-julia> rlassoEffect(X, y, d, method = "partialling out")
-Dict{String, Any} with 9 entries:
- "alpha" => -0.0498115
- "t" => -3.57421
- "se" => 0.0139364
- "coefficients_reg" => [0.0581009, -0.0755655, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 … 0.0, 0.0, 0.0, 0.0, 0.0, 0.0…
- "sample_size" => 90
- "coefficient" => -0.0498115
- "selection_index" => Any[true, false, true, false, true, false, false, false, false, false … false, false, false, …
- "residuals" => Dict("v"=>[0.522248, 0.130278, 0.072321, -0.131969, 0.0984047, 0.357306, 0.294098, 0.797784, -0…
- "coefficients" => -0.0498115
+julia> lasso_effect = rlassoEffect(X, y, d, method = "partialling out")
+julia> r_summary(lasso_effect);
+Estimates and significance testing of the effect of target variables
+ Row Estimate. Std. Error t value Pr(>|t|)
+ 1 -0.04981 0.01394 -3.57317 0.00035 ***
+Signif. codes:
+0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Instrumental Variable Estimation in High-Dimentional Settings
@@ -185,13 +195,16 @@ julia> x = x[:, (mean(x, dims = 1) .> 0.05)'];
julia> z = z[:, (mean(z, dims = 1) .> 0.05)'];
-julia> rlassoIV(x, d, y, z)
-Dict{String, Any} with 5 entries:
- "se" => [0.128507]
- "sample_size" => 312
- "vcov" => [0.0165139;;]
- "residuals" => [-0.20468; 0.0311701; … ; 0.252309; 0.335146;;]
- "coefficients" => [-0.0238347;;]
+julia> lasso_IV_XZ = rlassoIV(x, d, y, z)
+julia> r_summary(lasso_IV_XZ);
+Estimates and Significance Testing of the effect of target variables in the IV regression model
+ coeff. se. t-value p-value
+ d1 -0.02383 0.12851 -0.18543 0.85289
+Signif. codes:
+0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Inference on Treatment Effects on a High-Dimensional Setting
@@ -211,37 +224,141 @@ julia> z = pension[:, "e401"];
julia> X = Matrix(pension[:, ["i2", "i3", "i4", "i5", "i6", "i7", "a2", "a3", "a4", "a5", "fsize", "hs", "smcol", "col", "marr", "twoearn", "db", "pira", "hown"]]);
-julia> rlassoATE(X, d, y)
-Dict{String, Any} with 5 entries:
- "se" => 1930.68
- "individual" => [-30618.3, -57537.6, -71442.9, 21383.3, -2.32925e5, 3.40765e5, 97143.9, -286.995, 21439.9, 99072.0 …
- "sample_size" => 9915
- "te" => 10180.1
- "type" => "ATE"
-julia> rlassoATET(X, d, y)
-Dict{String, Any} with 6 entries:
- "se" => 2944.43
- "individual" => [-21536.4, -52877.2, -1.44867e5, -2739.29, -307741.0, 7.3912e5, 1.73107e5, 12929.3, -2569.57, 62331.…
- "sample_size" => 9915
- "te" => 12628.5
- "type" => "ATET"
-julia> rlassoLATE(X, d, y, z)
-Dict{String, Any} with 5 entries:
- "se" => 2326.9
- "individual" => [-50526.8, -1.39158e5, -1.37102e5, 38508.0, -6.5644e5, 7.94317e5, 2.50222e5, 71721.0, 39272.5, 1.440…
- "sample_size" => 9915
- "te" => 12992.1
- "type" => "LATE"
-julia> rlassoLATET(X, d, y, z)
-Dict{String, Any} with 6 entries:
- "se" => 3645.28
- "individual" => [-35580.5, -90558.0, -1.83628e5, -5303.13, -8.0766e5, 1.88668e6, 4.94743e5, 18436.0, -4847.72, 74008…
- "sample_size" => 9915
- "te" => 15323.2
- "type" => "LATET"
+julia> pension_ate = rlassoATE(X, d, y)
+julia> r_summary(pension_ate);
+ ------
+ Post-Lasso estimation: true
+ Intercept: true
+ Control: 0
+ Total number of variables: 19
+ Number of selected variables: 9
+ ------
+============ ============
+ Variable Estimate
+============ ============
+ Intercept -2.07033
+ V 1 -0.237913
+ V 3 0.618819
+ V 4 0.846136
+ V 5 1.10569
+ V 6 1.34217
+ V 10 -0.33151
+ V 16 0.0382348
+ V 17 0.620232
+ V 18 0.335563
+============ ============
+ Coeff SE t.value
+========== ========= ==========
+ 10180.1 1930.68 5.2728
+========== ========= ==========
+julia> pension_atet = rlassoATET(X, d, y)
+julia> r_summary(pension_atet);
+ ------
+ Post-Lasso estimation: true
+ Intercept: true
+ Control: 0
+ Total number of variables: 19
+ Number of selected variables: 6
+ ------
+============ ============
+ Variable Estimate
+============ ============
+ Intercept -1.79587
+ V 1 -0.608675
+ V 5 0.622942
+ V 6 0.839653
+ V 16 0.199394
+ V 17 0.643286
+ V 18 0.374925
+============ ============
+ Estimation and significance tesing of the treatment effect
+ Type: ATET
+ Bootstrap: none
+ Coeff SE t.value
+========== ========= ==========
+ 12628.5 2944.43 4.28893
+========== ========= ==========
+julia> pension_late = rlassoLATE(X, d, y, z)
+julia> r_summary(pension_late);
+ ------
+ Post-Lasso estimation: true
+ Intercept: true
+ Control: 0
+ Total number of variables: 19
+ Number of selected variables: 10
+ ------
+============ ============
+ Variable Estimate
+============ ============
+ Intercept -1.58403
+ V 1 -0.329602
+ V 3 0.657641
+ V 4 0.836492
+ V 5 1.11528
+ V 6 1.21348
+ V 8 0.142622
+ V 10 -0.299557
+ V 16 0.0516196
+ V 17 1.03219
+ V 18 0.135758
+============ ============
+ Estimation and significance tesing of the treatment effect
+ Type: LATE
+ Bootstrap: none
+========== ======== ==========
+ Coeff SE t.value
+========== ======== ==========
+ 12992.1 2326.9 5.58344
+========== ======== ==========
+julia> pension_latet = rlassoLATET(X, d, y, z)
+julia> r_summary(pension_latet);
+ ------
+ Post-Lasso estimation: true
+ Intercept: true
+ Control: 0
+ Total number of variables: 19
+ Number of selected variables: 5
+ ------
+============ ============
+ Variable Estimate
+============ ============
+ Intercept -1.25636
+ V 1 -0.714199
+ V 5 0.677564
+ V 6 0.794049
+ V 16 0.212127
+ V 17 1.05388
+============ ============
+ Estimation and significance tesing of the treatment effect
+ Type: LATET
+ Bootstrap: none
+========== ========= ==========
+ Coeff SE t.value
+========== ========= ==========
+ 15323.2 3645.28 4.20357
diff --git a/data/3_2.csv b/data/3_2.csv
new file mode 100644
index 0000000..bf8119b
--- /dev/null
+++ b/data/3_2.csv
@@ -0,0 +1,101 @@
diff --git a/data/3_2_2.csv b/data/3_2_2.csv
new file mode 100644
index 0000000..c5a67e2
--- /dev/null
+++ b/data/3_2_2.csv
@@ -0,0 +1,101 @@
diff --git a/data/4_1.csv b/data/4_1.csv
new file mode 100644
index 0000000..d2e9e9d
--- /dev/null
+++ b/data/4_1.csv
@@ -0,0 +1,5001 @@
diff --git a/data/4_2.csv b/data/4_2.csv
new file mode 100644
index 0000000..b74716f
--- /dev/null
+++ b/data/4_2.csv
@@ -0,0 +1,101 @@
diff --git a/data/7_.csv b/data/7_.csv
new file mode 100644
index 0000000..6c0e425
--- /dev/null
+++ b/data/7_.csv
@@ -0,0 +1,101 @@
diff --git a/src/HDMjl.jl b/src/HDMjl.jl
index 172ac51..162489c 100644
--- a/src/HDMjl.jl
+++ b/src/HDMjl.jl
@@ -1,8 +1,8 @@
module HDMjl
-export lambdaCalculation, init_values, as_logical, LassoShooting_fit, rlasso, rlassoEffect, rlassoEffects, rlassoIVselectX, rlassoIVselectZ, rlassoIV, rlassologit, rlassologitEffect, rlassologitEffects, rlassoATE, rlassoATET, rlassoLATE, rlassoLATET, tsls
+export lambdaCalculation, init_values, as_logical, LassoShooting_fit, rlasso, rlassoEffect, rlassoEffects, rlassoIVselectX, rlassoIVselectZ, rlassoIV, rlassologit, rlassologitEffect, rlassologitEffects, rlassoATE, rlassoATET, rlassoLATE, rlassoLATET, tsls, r_summary, r_print, r_confint, r_predict
-using Statistics, GLM, DataFrames, LinearAlgebra, GLMNet, Random
+using Statistics, GLM, DataFrames, LinearAlgebra, GLMNet, Random, PrettyTables, Distributions
diff --git a/src/help_functions.jl b/src/help_functions.jl
index 82bfe15..1df1988 100644
--- a/src/help_functions.jl
+++ b/src/help_functions.jl
@@ -7,6 +7,7 @@ function init_values(x, y; number::Int64 = 5, intercept::Bool = true)
for i in 1:p
append!(corr, abs.(cor(y, x[:, i])))
+ corr = replace(corr, NaN => minimum(filter(!isnan,corr)) -1)
index = sortperm(corr, rev = true)[1 : min(number, p)]
coefficients = zeros(p)
@@ -39,7 +40,7 @@ end
function lambdaCalculation(; homoskedastic::Union{Bool, String} = false, X_dependent_lambda::Bool = false, lambda_start = nothing, c::Float64 = 1.1, gamma::Float64 = 0.1, numSim::Int = 5000, y = nothing, x = nothing)
# homoskedastic and X-independent
- if homoskedastic == true & !X_dependent_lambda
+ if homoskedastic == true && X_dependent_lambda == false
p = size(x, 2)
n = size(x, 1)
lambda0 = 2 * c * sqrt(n) * quantile(Normal(0.0, 1.0), 1 - gamma / (2 * p))
@@ -47,7 +48,7 @@ function lambdaCalculation(; homoskedastic::Union{Bool, String} = false, X_depen
lambda = zeros(p) .+ lambda0 * Ups0
# homoskedastic and X-dependent
- elseif homoskedastic == true & X_dependent_lambda
+ elseif homoskedastic == true && X_dependent_lambda == true
p = size(x, 2)
n = size(x, 1)
R = numSim
@@ -66,7 +67,7 @@ function lambdaCalculation(; homoskedastic::Union{Bool, String} = false, X_depen
lambda = lambda0 * Ups0
# heteroskeddastic and X-independent
- elseif homoskedastic == false & !X_dependent_lambda
+ elseif homoskedastic == false && X_dependent_lambda == false
p = size(x, 2)
n = size(x, 1)
lambda0 = 2 * c * sqrt(n) * quantile(Normal(0.0, 1.0), 1 - gamma / (2 * p))
@@ -74,7 +75,7 @@ function lambdaCalculation(; homoskedastic::Union{Bool, String} = false, X_depen
lambda = lambda0 * Ups0
# heteroskedastic and X-dependent
- elseif homoskedastic == false & X_dependent_lambda
+ elseif homoskedastic == false && X_dependent_lambda ==true
p = size(x, 2)
n = size(x, 1)
R = numSim
diff --git a/src/rlasso.jl b/src/rlasso.jl
index 732d3a6..1889034 100644
--- a/src/rlasso.jl
+++ b/src/rlasso.jl
@@ -1,12 +1,44 @@
+mutable struct r_lasso
+ result
+ head_msg
+ foot_msg
+ main_tbl
function rlasso(x, y; post = true, intercept = true, model = true,
homoskedastic = false, X_dependent_lambda = false, lambda_start = nothing,
c = 1.1, maxIter = 15, tol::Float64 = 1e-5, n = size(y, 1), gamma = 0.1 / log(n), threshold = nothing)
+ names_col = try
+ names(x)
+ catch
+ nothing
+ end
+ x = Matrix(x[:, :])
+ y = Matrix(y[:, :])
+ n_cl = []
if isnothing(threshold)
threshold = 0
n = size(y, 1)
p = size(x, 2)
+ if isnothing(names_col)
+ for i in 1:p
+ push!(n_cl, "V $i")
+ end
+ else
+ n_cl = names_col
+ end
+ if post == false
+ c = .5
+ end
+ # print(n_cl)
if intercept
meanx = mean(x, dims = 1)
x = x .- meanx
@@ -39,8 +71,8 @@ function rlasso(x, y; post = true, intercept = true, model = true,
coefTemp = LassoShooting_fit(x, y, lambda, XX = XX, Xy = Xy)["coefficients"]
- global coefTemp[isnan.(coefTemp)] .= 0
- global ind1 = abs.(coefTemp) .> 0
+ coefTemp[isnan.(coefTemp)] .= 0
+ ind1 = abs.(coefTemp) .> 0
global x1 = x[:, ind1]
if isempty(x1)
@@ -71,32 +103,32 @@ function rlasso(x, y; post = true, intercept = true, model = true,
global e1 = y - x1 * coefT
coefTemp[ind1] = coefT
elseif !post
- e1 = y - x1 * coefTemp[ind1]
+ global e1 = y - x1 * coefTemp[ind1]
s1 = sqrt(var(e1))
# Homoskedastic and X-independent
- if homoskedastic == true & !X_dependent_lambda
+ if (homoskedastic == true) && !X_dependent_lambda
Ups1 = s1 * Psi
lambda = pen["lambda0"] * Ups1
- # Homoskedastic and X-dependent
- elseif homoskedastic == true & X_dependent_lambda
+ # Homoskedastic and X-dependent
+ elseif (homoskedastic == true) && X_dependent_lambda
Ups1 = s1 * Psi
lambda = pen["lambda0"] * Ups1
- # Heteroskedastic and X-independent
- elseif homoskedastic == false & !X_dependent_lambda
+ # Heteroskedastic and X-independent
+ elseif (homoskedastic == false) && !X_dependent_lambda
Ups1 = 1 / sqrt(n) .* sqrt.(((e1 .^ 2)' * (x .^ 2))')
lambda = Ups1 * pen["lambda0"]
- # Heteroskedastic and X-dependent
- elseif homoskedastic == false & X_dependent_lambda
+ # Heteroskedastic and X-dependent
+ elseif (homoskedastic == false) && X_dependent_lambda
lc = lambdaCalculation(x = x, y = e1, homoskedastic = homoskedastic, X_dependent_lambda = X_dependent_lambda, lambda_start = lambda_start, c = c, gamma = gamma)
Ups1 = lc["Ups0"]
lambda = lc["lambda"]
- # Homoskedastic = "none"
+ # Homoskedastic = "none"
elseif homoskedastic == "none"
if isnothing(lambda_start)
throw(ArgumentError("lambda_start required when homoskedastic is set to none" ))
@@ -116,9 +148,9 @@ function rlasso(x, y; post = true, intercept = true, model = true,
coefTemp = None
ind1 = zeros(p)
- global coefTemp = coefTemp
+ coefTemp = coefTemp
coefTemp[abs.(coefTemp) .< threshold] .= 0
- global ind1 = ind1
+ ind1 = ind1
if intercept
if isnothing(mu)
mu = 0
@@ -135,24 +167,90 @@ function rlasso(x, y; post = true, intercept = true, model = true,
intercept_value = nothing
if intercept
beta = vcat(intercept_value, coefTemp)
+ main_tbl = hcat(vcat(["Intercept"], n_cl), beta)
beta = coefTemp
+ main_tbl = hcat(n_cl, beta)
s1 = sqrt(var(e1))
est = Dict("coefficients" => beta, "beta" => coefTemp, "intercept" => intercept_value, "index" => ind1,
"residuals" => e1, "sigma" => s1, "loadings" => Ups1, "iter" => mm, "lambda0" => lambda0, "lambda" => lambda,
"options" => Dict("post" => post, "intercept" => intercept, "ind_scale" => ind, "mu" => mu, "meanx" => meanx), "model" => model)
- if model
+ if model
x = x .+ meanx
est["model"] = x
est["model"] = nothing
+ est["covariates"] = p
est["tss"] = sum((y .- mean(y)) .^ 2)
est["rss"] = sum(est["residuals"] .^ 2)
est["dev"] = y .- mean(y)
+ est["main_tbl"] = main_tbl
return est
+function r_summary(rlasso_obj::Dict; all = false)
+ select_cl = []
+ for i in eachindex(rlasso_obj["coefficients"])
+ if rlasso_obj["coefficients"][i] != 0
+ push!(select_cl, i)
+ end
+ end
+ if rlasso_obj["intercept"] != 0
+ intercept = 1
+ else
+ intercept = 0
+ end
+ if all
+ tbl = rlasso_obj["main_tbl"]
+ else
+ tbl = rlasso_obj["main_tbl"][select_cl, :]
+ end
+ head_msg = "
+ Post-Lasso Estimation: $(rlasso_obj["options"]["post"])
+ Total number of variables: $(rlasso_obj["covariates"])
+ Number of selected variables: $(sum(rlasso_obj["index"]))
+ ---
+ "
+ # Selected variables: ((tbl[select_cl, 1])')
+ print(head_msg)
+ print(" \n")
+ @ptconf tf = tf_simple alignment = :l
+ @pt :header = ["Variable", "Estimate"] tbl
+ n = size(rlasso_obj["model"], 1)
+ r_squared = 1 - rlasso_obj["rss"] / rlasso_obj["tss"]
+ r_squared_adj = 1 - (1 - r_squared) * ((n - intercept) / (n - sum(rlasso_obj["index"]) - intercept))
+ foot_msg = "
+ ----
+ Multiple R-squared: $r_squared
+ Adjusted R-squared: $r_squared_adj
+ "
+ print(foot_msg)
+ # print(rlasso_obj.head_msg)
+ # print(rlasso_obj.foot_msg)
+function r_predict(rlasso::Dict; xnew = rlasso["model"])
+ n, p = size(xnew)
+ if rlasso["intercept"] != 0
+ y_hat = hcat(ones(n), xnew) * rlasso["coefficients"]
+ else
+ y_hat = xnew * rlasso["coefficients"]
+ end
+ return y_hat
\ No newline at end of file
diff --git a/src/rlassoEffect.jl b/src/rlassoEffect.jl
index 798254e..90dd6a7 100644
--- a/src/rlassoEffect.jl
+++ b/src/rlassoEffect.jl
@@ -1,165 +1,382 @@
+mutable struct rlassoEffect1
+ se
+ sample_size
+ coefficients
+ dict
function rlassoEffect(
- x,
- y,
- d;
- method = "double selection",
- I3 = nothing,
- post = true
+ x,
+ y,
+ d;
+ method = "double selection",
+ I3 = nothing,
+ post = true
+#= x = Matrix(x)
+y = Matrix(y[:, :])
+d = Matrix(d[:, :]) =#
+n, p = size(x)
+# i3 = Set(I3)
+if method == "double selection"
+ I1 = rlasso(x, d, post = post)["index"]
+ I2 = rlasso(x, y, post = post)["index"]
+ if !isnothing(I3)
+ I = I1 + I2 + I3
+ else
+ I = I1 + I2
+ end
+ if sum(I) == 0
+ I = nothing
+ end
+ I[I .> 0] .= 1
+ I = BitVector(I)
+ x = hcat(ones(n), d, x[:, I])
+ reg1 = GLM.lm(x, y)
+ alpha = GLM.coef(reg1)[2]
+ xi = GLM.residuals(reg1) .* sqrt(n / (n - sum(I) - 1))
+ if isnothing(I)
+ reg2 = GLM.lm(ones(n, 1), d)
+ else
+ reg2 = GLM.lm(x[:, Not(2)], d)
+ end
+ v = GLM.residuals(reg2)
+ var_r = 1 / n * 1 /mean(v.^2) * mean(v.^2 .* xi.^2) * 1 / mean(v.^2)
+ se = sqrt(var_r)
+ tval = alpha / se
+ # pval = 2 ##3 searching function (`pnorm`)
+ if isnothing(I)
+ no_select = 1
+ else
+ no_select = 0
+ end
+ res = Dict(
+ "epsilon" => xi, "v" => v
- #= x = Matrix(x)
- y = Matrix(y[:, :])
- d = Matrix(d[:, :]) =#
+ results = Dict(
+ "alpha" => alpha, "se" => se, "t" => tval,
+ "no_select" => no_select, "coefficients" => alpha, "coefficient" => alpha,
+ "coefficients_reg" => GLM.coef(reg1), "selection_index" => I, "residuals" => res,
+ "sample_size" => n
+ )
+elseif method == "partialling out"
+ reg1 = rlasso(x, y, post = post)
+ yr = reg1["residuals"]
+ reg2 = rlasso(x, d, post = post)
+ dr = reg2["residuals"]
- n, p = size(x)
+ data0 = hcat(yr, ones(n), dr)
+ reg3 = GLM.lm(data0[:, Not(1)], data0[:, 1])
+ alpha = GLM.coef(reg3)[2]
- # i3 = Set(I3)
+ var = vcov(reg3)[2, 2]
+ se = sqrt.(var)
+ tval = alpha ./ sqrt(var)
+ # pval =
+ res = Dict("epsilon" => GLM.residuals(reg3), "v" => dr)
- if method == "double selection"
- I1 = rlasso(x, d, post = post)["index"]
- I2 = rlasso(x, y, post = post)["index"]
+ I1 = reg1["index"]
+ I2 = reg2["index"]
+ I = as_logical(I1 + I2)
+ results = Dict(
+ "alpha" => alpha, "se" => se, "t" => tval, "coefficients" => alpha,
+ "coefficient" => alpha, "coefficients_reg" => reg1["coefficients"], "selection_index" => I,
+ "residuals" => res, "sample_size" => n
+ )
- if !isnothing(I3)
- I = I1 + I2 + I3
- else
- I = I1 + I2
- end
+se = results["se"]
+sample_size = results["sample_size"]
+coefficient = results["coefficient"]
+return rlassoEffect1(se, sample_size, coefficient, results)
- if sum(I) == 0
- I = nothing
- end
- I[I .> 0] .= 1
- I = BitVector(I)
- x = hcat(ones(n), d, x[:, I])
- reg1 = GLM.lm(x, y)
- alpha = GLM.coef(reg1)[2]
- xi = GLM.residuals(reg1) .* sqrt(n / (n - sum(I) - 1))
- if isnothing(I)
- reg2 = GLM.lm(ones(n, 1), d)
- else
- reg2 = GLM.lm(x[:, Not(2)], d)
- end
- v = GLM.residuals(reg2)
- var = 1 / n * 1 /mean(v.^2) * mean(v.^2 .* xi.^2) * 1 / mean(v.^2)
- se = sqrt(var)
- tval = alpha / se
- # pval = 2 ##3 searching function (`pnorm`)
- if isnothing(I)
- no_select = 1
- else
- no_select = 0
- end
- res = Dict(
- "epsilon" => xi, "v" => v
- )
- results = Dict(
- "alpha" => alpha, "se" => se, "t" => tval,
- "no_select" => no_select, "coefficients" => alpha, "coefficient" => alpha,
- "coefficients_reg" => GLM.coef(reg1), "selection_index" => I, "residuals" => res,
- "sample_size" => n
- )
- elseif method == "partialling out"
- reg1 = rlasso(x, y, post = post)
- yr = reg1["residuals"]
- reg2 = rlasso(x, d, post = post)
- dr = reg2["residuals"]
- data0 = hcat(yr, ones(n), dr)
- reg3 = GLM.lm(data0[:, Not(1)], data0[:, 1])
- alpha = GLM.coef(reg3)[2]
- var = vcov(reg3)[2, 2]
- se = sqrt.(var)
- tval = alpha ./ sqrt(var)
- # pval =
- res = Dict("epsilon" => GLM.residuals(reg3), "v" => dr)
- I1 = reg1["index"]
- I2 = reg2["index"]
- I = as_logical(I1 + I2)
- results = Dict(
- "alpha" => alpha, "se" => se, "t" => tval, "coefficients" => alpha,
- "coefficient" => alpha, "coefficients_reg" => reg1["coefficients"], "selection_index" => I,
- "residuals" => res, "sample_size" => n
- )
+function r_summary(object::rlassoEffect1)
+ if length(object.coefficients) != 0
+ k = length(object.coefficients)
+ table = zeros(k, 4)
+ table[:, 1] .= round.(object.coefficients, digits = 5)
+ table[:, 2] .= round.(object.se, digits = 5)
+ table[:, 3] .= round.(table[:, 1]./table[:, 2], digits = 5)
+ table[:, 4] .= round.(2 * cdf(Normal(), -abs.(table[:, 3])), digits = 5)
+ table1 = DataFrame(table, :auto)
+ table1 = rename(table1, ["Estimate.", "Std. Error", "t value", "Pr(>|t|)"])
+ table2 = string.(copy(table))
+ a = table2[:, 4];
+ b = string.(zeros(length(a)))
+ b[parse.(Float64, a) .>= 0 .&& parse.(Float64, a).< 0.001] .= a[parse.(Float64, a) .>= 0 .&& parse.(Float64, a).< 0.001] .* " ***"
+ b[parse.(Float64, a) .>= 0.001 .&& parse.(Float64, a).< 0.01] .= a[parse.(Float64, a) .>= 0.001 .&& parse.(Float64, a).< 0.01] .* " **"
+ b[parse.(Float64, a) .>= 0.01 .&& parse.(Float64, a).< 0.05] .= a[parse.(Float64, a) .>= 0.01 .&& parse.(Float64, a).< 0.05] .* " *"
+ b[parse.(Float64, a) .>= 0.05 .&& parse.(Float64, a).< 0.1] .= a[parse.(Float64, a) .>= 0.05 .&& parse.(Float64, a).< 0.1] .* " ."
+ b[parse.(Float64, a) .>= 0.1 .&& parse.(Float64, a).< 1] .= a[parse.(Float64, a) .>= 0.1 .&& parse.(Float64, a).< 1] .* " "
+ b = vec(b)
+ table2[:, 4] .= b
+ table2 = DataFrame(table2, :auto)
+ table2 = rename(table2, ["coeff.", "se.", "t-value", "p-value"])
+ print("""Estimates and significance testing of the effect of target variables""",
+ "\n")
+ pretty_table(table2, show_row_number = true, header = ["Estimate.", "Std. Error", "t value", "Pr(>|t|)"], tf = tf_borderless)
+ print("---", "\n", "Signif. codes:","\n", "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1")
+ print("\n")
+ else
+ print("No coefficients\n")
- return results
+function r_print(object::rlassoEffect1; digits = 3)
+ if length(object.coefficients) != 0
+ b = ["$y" for y = 1:length(object.coefficients)]
+ b = reshape(b,(1,length(b)))
+ a = vcat(b, round.(object.coefficients', digits = digits))
+ if length(object.coefficients) <= 10
+ println("Coefficients:\n")
+ pretty_table(a[2,:]', tf = tf_borderless, header = a[1,:])
+ elseif string(length(object.coefficients))[count("", string(length(object.coefficients)))-1:count("", string(length(object.coefficients)))-1] == "0"
+ for i in 1:convert(Int, trunc(length(object.coefficients)/10))
+ pretty_table(a[2,10*(i-1)+1:10*i]', tf = tf_borderless, header = a[1,10*(i-1)+1:10*i])
+ end
+ else
+ for i in 1:convert(Int, trunc(length(object.coefficients)/10))
+ pretty_table(a[2,10*(i-1)+1:10*i]', tf = tf_borderless, header = a[1,10*(i-1)+1:10*i])
+ end
+ pretty_table(a[2,10*trunc(length(object.coefficients)/10)+1:length(object.coefficients)]',
+ tf = tf_borderless, header = a[1,10*trunc(length(object.coefficients)/10)+1:length(object.coefficients)])
+ end
+ else
+ print("No coefficients\n")
+ end
+mutable struct rlassoEffects1
+ se
+ sample_size
+ index
+ coefficients
+ dict
-function rlassoEffects(x, y; index = 1:size(x, 2), I3 = nothing)
+function rlassoEffects(x, y; index = 1:size(x, 2), I3 = nothing, method = "partialling out", post = true)
- x = Matrix(x)
- y = Matrix(y[:, :])
+ # if method ∉ ["partialling out", "double selection"]
+ # print("Method not found, select from [partialling out, double selection")
+ # end
- if Set(index) > 2
- k = p = length(index)
- # all(all())
+ if length(Set(index)) == 2
+ k = p1 = sum(index)
- k = p = sum(index)
+ k = p1 = length(index)
+ end
+ n, p = size(x)
+ names_x = try names(x)
+ catch
+ nothing
+ end
+ x0 = Matrix(x[:, :])
+ # y0 = Matrix(y[:, 1])
+ if isnothing(names_x)
+ names_x = []
+ for i in 1:p
+ push!(names_x, "V $i")
+ end
- n, p0 = size(x, 1)
coefficients = zeros(k)
se = zeros(k)
t = zeros(k)
- lasso_reg = Dict()
+ pval = zeros(k)
+ lasso_reg = Dict()
reside = zeros(n, p1)
residv = zeros(n, p1)
- selection_matrix = zeros(p0, k)
coef_mat = Dict()
+ selection_matrix = zeros(n, k)
+# names(coefficients) <- names(se) <- names(t) <- names(pval) <- names(lasso.regs) <- colnames(reside) <- colnames(residv) <- colnames(selection.matrix) <- colnames(x)[index]
for i in 1:k
- d = x[:, index[i]]
- xt = x[:, Not(index[i])]
+ d = x0[:, index[i]]
+ xt = x0[:, Not(index[i])]
+ # Variables de control
if isnothing(I3)
I3m = I3
I3m = I3[Not(index[i])]
- lasso_reg[i] = try
- rlassoEffect(xt, y, d, method = method, I3 = I3m, post = post)
- catch
- "try-error"
- end
- if lasso_regs[i] == "try-error"
+ lasso_reg[i] = try
+ rlassoEffect(xt, y, d, method = method, I3 = I3m, post = post)
+ catch
+ "try-error"
+ end
+ if lasso_reg[i] == "try-error"
- coefficients[i] = lasso_regs[i]["alpha"]
- se[i] = lasso_regs[i]["se"]
- t[i] = lasso_regs[i]["t"]
- coef_mat[i] = lasso_reg["coefficients_reg"]
- reside[:, i] = lasso_regs[i]["residuals"]["epsilon"]
- residv[:, i] = lasso_regs[i]["residuals"]["v"]
- selection_matrix[Not(index[i]), i] = lasso_reg["selection_index"]
+ coefficients[i] = lasso_reg[i].dict["alpha"]
+ se[i] = lasso_reg[i].dict["se"]
+ t[i] = lasso_reg[i].dict["t"]
+ # pval[i] = lasso_reg[i]["p_value"]
+ reside[:, i] = lasso_reg[i].dict["residuals"]["epsilon"]
+ residv[:, i] = lasso_reg[i].dict["residuals"]["v"]
+ coef_mat[i] = lasso_reg[i].dict["coefficients_reg"]
+ # selection_matrix[Not(index[i]), i] = lasso_reg[i]["selection_index"]
residuals = Dict("e" => reside, "v" => residv)
res = Dict(
"coefficients" => coefficients, "se" => se, "t" => t,
- "lasso_regs" => lasso_reg, "index" => index, "sample_size" => n,
+ "lasso_reg" => lasso_reg, "index" => index, "sample_size" => n,
"residuals" => residuals, "coef_mat" => coef_mat, "selection_matrix" => selection_matrix
- return res
+ se = res["se"]
+ sample_size = res["sample_size"]
+ index = res["index"]
+ coefficients = res["coefficients"]
+ return rlassoEffects1(se, sample_size, index, coefficients, res)
+function r_print(object::rlassoEffects1; digits = 3)
+ if length(object.coefficients) != 0
+ b = ["X$y" for y = object.index]
+ b = reshape(b,(1,length(b)))
+ a = vcat(b, round.(object.coefficients', digits = digits))
+ if length(object.coefficients) <= 10
+ println("Coefficients:\n")
+ pretty_table(a[2,:]', tf = tf_borderless, header = a[1,:], nosubheader = true, equal_columns_width = true, columns_width = 9, alignment=:c) #, header_crayon =crayon"blue")
+ elseif string(length(object.coefficients))[count("", string(length(object.coefficients)))-1:count("", string(length(object.coefficients)))-1] == "0"
+ for i in 1:convert(Int, trunc(length(object.coefficients)/10))
+ pretty_table(a[2,10*(i-1)+1:10*i]', tf = tf_borderless, header = a[1,10*(i-1)+1:10*i], nosubheader = true, equal_columns_width = true, columns_width = 9, alignment=:c)#, header_crayon =crayon"green")
+ print("\n")
+ end
+ else
+ for i in 1:convert(Int, trunc(length(object.coefficients)/10))
+ pretty_table(a[2,10*(i-1)+1:10*i]', tf = tf_borderless, header = a[1,10*(i-1)+1:10*i], nosubheader = true, equal_columns_width = true, columns_width = 9, alignment=:c)#, header_crayon =crayon"green")
+ print("\n")
+ end
+ pretty_table(a[2,10*trunc(length(object.coefficients)/10)+1:length(object.coefficients)]', alignment=:c, nosubheader = true, equal_columns_width = true, columns_width = 9,
+ tf = tf_borderless, header = a[1,10*trunc(length(object.coefficients)/10)+1:length(object.coefficients)])#, header_crayon =crayon"green")
+ end
+ else
+ print("No coefficients\n")
+ end
+function r_summary(object::rlassoEffects1)
+ if length(object.coefficients) != 0
+ k = length(object.coefficients)
+ table = zeros(k, 4)
+ table[:, 1] .= round.(object.coefficients, digits = 5)
+ table[:, 2] .= round.(object.se, digits = 5)
+ table[:, 3] .= round.(table[:, 1]./table[:, 2], digits = 5)
+ table[:, 4] .= round.(2 * cdf(Normal(), -abs.(table[:, 3])), digits = 5)
+ table1 = DataFrame(hcat(["X$y" for y = object.index], table), :auto)
+ table1 = rename(table1, ["index", "Estimate.", "Std. Error", "t value", "Pr(>|t|)"])
+ table2 = string.(copy(table))
+ a = table2[:, 4];
+ b = string.(zeros(length(a)))
+ b[parse.(Float64, a) .>= 0 .&& parse.(Float64, a).< 0.001] .= a[parse.(Float64, a) .>= 0 .&& parse.(Float64, a).< 0.001] .* " ***"
+ b[parse.(Float64, a) .>= 0.001 .&& parse.(Float64, a).< 0.01] .= a[parse.(Float64, a) .>= 0.001 .&& parse.(Float64, a).< 0.01] .* " **"
+ b[parse.(Float64, a) .>= 0.01 .&& parse.(Float64, a).< 0.05] .= a[parse.(Float64, a) .>= 0.01 .&& parse.(Float64, a).< 0.05] .* " *"
+ b[parse.(Float64, a) .>= 0.05 .&& parse.(Float64, a).< 0.1] .= a[parse.(Float64, a) .>= 0.05 .&& parse.(Float64, a).< 0.1] .* " ."
+ b[parse.(Float64, a) .>= 0.1 .&& parse.(Float64, a).< 1] .= a[parse.(Float64, a) .>= 0.1 .&& parse.(Float64, a).< 1] .* " "
+ b = vec(b)
+ table2[:, 4] .= b
+ table2 = DataFrame(hcat(object.coefficients[:,1], table2), :auto)
+ table2 = rename(table2, [" ", "coeff.", "se.", "t-value", "p-value"])
+ print("Estimates and significance testing of the effect of target variables",
+ "\n")
+ pretty_table(table2, show_row_number = false, header = [" ","Estimate.", "Std. Error", "t value", "Pr(>|t|)"], tf = tf_borderless, row_names = ["X$y" for y = object.index])
+ print("---", "\n", "Signif. codes:","\n", "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1")
+ print("\n")
+ return table1
+ else
+ print("No coefficients\n")
+ end
+function r_confint(object::rlassoEffects1, level = 0.95; joint = false)
+ n = object.sample_size
+ k = length(object.coefficients)
+ cf = object.coefficients
+ #pnames <- names(cf)
+ # if (missing(parm))
+ # parm <- pnames else if (is.numeric(parm))
+ # parm <- pnames[parm]
+ if !joint
+ a = (1 - level)/2
+ a = [a, 1 - a]
+ fac = quantile.(Normal(), a)
+ pct = string.(round.(a; digits = 3)*100, "%")
+ ses = object.se
+ c_i = []
+ for i in 1:length(cf)
+ if i == 1
+ c_i = (cf[i] .+ ses[i] .* fac)[:,:]'
+ else
+ c_i = vcat(c_i, (cf[i] .+ ses[i] * fac)[:,:]')
+ end
+ end
+ table1 = DataFrame(hcat(["X$y" for y = object.index], c_i), :auto)
+ table1 = rename(table1, vcat("index", pct))
+ #ci = NamedArray(c_i, (1:size(c_i)[1], pct))
+ ci = pretty_table(c_i; header = pct, show_row_number = false, tf = tf_borderless, row_names = ["X$y" for y = object.index])
+ return table1
+ end
+ if joint
+ B = 500
+ e = object.dict["residuals"]["e"]
+ v = object.dict["residuals"]["v"]
+ ev = e .* v
+ Ev2 = mean(v.^2, dims = 1)
+ Omegahat = zeros(k, k);
+ for j in 1:k
+ for l in 1:k
+ Omegahat[j,l] = Omegahat[l,j] = 1/(Ev2[j]*Ev2[l]) .* mean(ev[:,j].*ev[:,l])
+ end
+ end
+ var = diag(Omegahat)
+ sim = zeros(B);;
+ for i in 1:B
+ beta_i = rand(MvNormal(zeros(k), Omegahat./n));
+ sim[i] = maximum(abs.(beta_i ./ sqrt.(var)))
+ end
+ a = (1 - level) #not dividing by 2!
+ ab = [a/2, 1 - a/2]
+ pct = string.(round.(ab; digits = 3)*100, "%");
+ c_i = zeros(length(cf), 2)
+ hatc = quantile(sim, 1-a)
+ c_i[:, 1] = cf .- hatc .* sqrt.(var)
+ c_i[:, 2] = cf .+ hatc .* sqrt.(var)
+ table1 = DataFrame(hcat(["X$y" for y = object.index], c_i), :auto)
+ table1 = rename(table1, vcat("index", pct))
+ ci = pretty_table(c_i; header = pct, show_row_number = false, tf = tf_borderless, row_names = ["X$y" for y = object.index])
+ return table1
+ end
\ No newline at end of file
diff --git a/src/rlassoIV.jl b/src/rlassoIV.jl
index 71be083..bd1ec66 100644
--- a/src/rlassoIV.jl
+++ b/src/rlassoIV.jl
@@ -1,17 +1,26 @@
+mutable struct rlassoIV1
+ se
+ sample_size
+ #vcov
+ coefficients
+ dict
function rlassoIV(x, d, y, z; select_Z::Bool = true, select_X::Bool = true, post::Bool = true)
- if !select_Z & !select_X
- res = tsls(d, y, z, x, homoskedastic = false)
- return res
+ if select_Z == false && select_X == false
+ res = tsls(d, y, z, x, homoscedastic = false)
+ #res["coefficients"] = hcat(["d$y" for y = 1:size(d[:,:],2)], res["coefficients"])
+ se = res["se"]
- elseif select_Z & !select_X
+ elseif select_Z == true && select_X == false
res = rlassoIVselectZ(x, d, y, z, post = post)
- return res
+ res["sample_size"] = size(x)[1]
- elseif !select_Z & select_X
+ elseif select_Z == false && select_X == true
res = rlassoIVselectX(x, d, y, z, post = post)
- return res
+ #res["sample_size"] = size(x)[1]
- elseif select_Z & select_X
+ elseif select_Z == true && select_X == true
Z = hcat(z, x)
lasso_d_zx = rlasso(Z, d, post = post)
@@ -45,6 +54,105 @@ function rlassoIV(x, d, y, z; select_Z::Bool = true, select_X::Bool = true, post
res = tsls(Dr, Yr, Zr, intercept = false, homoscedastic = false)
- return res
+ #res["coefficients"] = hcat(["d$y" for y = 1:size(d[:,:],2)], res["coefficients"])
+ se = res["se"]
+ sample_size = res["sample_size"]
+ #vcov = res["vcov"]
+ coefficients = res["coefficients"]
+ res1 = rlassoIV1(se, sample_size, coefficients, res);
+ return res1;
+#using Crayons
+function r_print(object::rlassoIV1, n_digits = 3)
+ if size([object.coefficients])[1] != 0
+ # b = ["X$y" for y = 1:length(object.coefficients)]
+ # b = reshape(b,(1,length(b)))
+ # a = vcat(b, round.(object.coefficients', digits = digits))
+ a = hcat(object.coefficients[:,1], round.(object.coefficients[:,2], digits = n_digits))
+ if size(object.coefficients, 1) <= 10
+ println("Coefficients:\n")
+ pretty_table(a[:, 2]', tf = tf_borderless, header = a[:, 1], nosubheader = true, equal_columns_width = true, columns_width = 10, alignment=:c) #, header_crayon =crayon"blue")
+ elseif string(length(object.coefficients))[count("", string(length(object.coefficients)))-1:count("", string(length(object.coefficients)))-1] == "0"
+ for i in 1:convert(Int, trunc(size(object.coefficients, 1)/10, digits =0))
+ pretty_table(a[10*(i-1)+1:10*i, 2]', tf = tf_borderless, header = a[10*(i-1)+1:10*i, 1], nosubheader = true, equal_columns_width = true, columns_width = 10, alignment=:c)#, header_crayon =crayon"green")
+ print("\n")
+ end
+ else
+ for i in 1:convert(Int, trunc(size(object.coefficients, 1)/10, digits =0))
+ pretty_table(a[10*(i-1)+1:10*i, 2]', tf = tf_borderless, header = a[10*(i-1)+1:10*i, 1], nosubheader = true, equal_columns_width = true, columns_width = 10, alignment=:c)#, header_crayon =crayon"green")
+ print("\n")
+ end
+ pretty_table(a[10*convert(Int, trunc(size(object.coefficients, 1)/10, digits =0))+1:size(object.coefficients, 1), 2]', alignment=:c, nosubheader = true, equal_columns_width = true, columns_width = 10,
+ tf = tf_borderless, header = a[10*convert(Int, trunc(size(object.coefficients, 1)/10, digits =0))+1:size(object.coefficients, 1), 1]) #, header_crayon =crayon"green")
+ end
+ else
+ print("No coefficients\n")
+ end
+#using Distributions
+function r_summary(object::rlassoIV1)
+ if size([object.coefficients])[1] != 0
+ k = length(object.coefficients[:,2])
+ table = zeros(k, 4)
+ table[:, 1] .= round.(vec(object.coefficients[:,2]), digits = 5)
+ table[:, 2] .= round.(object.se, digits = 5)
+ table[:, 3] .= round.(table[:, 1]./table[:, 2], digits = 5)
+ table[:, 4] .= round.(2 * cdf(Normal(), -abs.(table[:, 3])), digits = 5)
+ table1 = DataFrame(hcat(object.coefficients[:,1], table), :auto)
+ table1 = rename(table1, [" ", "coeff.", "se.", "t-value", "p-value"])
+ table2 = string.(copy(table))
+ a = table2[:, 4];
+ b = string.(zeros(length(a)))
+ b[parse.(Float64, a) .>= 0 .&& parse.(Float64, a).< 0.001] .= a[parse.(Float64, a) .>= 0 .&& parse.(Float64, a).< 0.001] .* " ***"
+ b[parse.(Float64, a) .>= 0.001 .&& parse.(Float64, a).< 0.01] .= a[parse.(Float64, a) .>= 0.001 .&& parse.(Float64, a).< 0.01] .* " **"
+ b[parse.(Float64, a) .>= 0.01 .&& parse.(Float64, a).< 0.05] .= a[parse.(Float64, a) .>= 0.01 .&& parse.(Float64, a).< 0.05] .* " *"
+ b[parse.(Float64, a) .>= 0.05 .&& parse.(Float64, a).< 0.1] .= a[parse.(Float64, a) .>= 0.05 .&& parse.(Float64, a).< 0.1] .* " ."
+ b[parse.(Float64, a) .>= 0.1 .&& parse.(Float64, a).< 1] .= a[parse.(Float64, a) .>= 0.1 .&& parse.(Float64, a).< 1] .* " "
+ b = vec(b)
+ table2[:, 4] .= b
+ table2 = DataFrame(hcat(object.coefficients[:,1], table2), :auto)
+ table2 = rename(table2, [" ", "coeff.", "se.", "t-value", "p-value"])
+ print("Estimates and Significance Testing of the effect of target variables in the IV regression model",
+ "\n")
+ pretty_table(table2, show_row_number = false, header = [" ", "coeff.", "se.", "t-value", "p-value"], tf = tf_borderless)
+ print("---", "\n", "Signif. codes:","\n", "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1")
+ print("\n")
+ return table1;
+ else
+ print("No coefficients\n")
+ #table = []
+ end
+ #return table;;
+function r_confint(object::rlassoIV1, level = 0.95)
+ n = object.sample_size
+ k = length(object.coefficients)
+ cf = object.coefficients[:,2:end]
+ #pnames <- names(cf)
+ # if (missing(parm))
+ # parm <- pnames else if (is.numeric(parm))
+ # parm <- pnames[parm]
+ a = (1 - level)/2
+ a = [a, 1 - a]
+ fac = quantile.(Normal(), a)
+ pct = string.(round.(a; digits = 3)*100, "%")
+ ses = object.se
+ c_i = []
+ for i in 1:length(cf)
+ if i == 1
+ c_i = (cf[i] .+ ses[i] .* fac)[:,:]'
+ else
+ c_i = vcat(c_i, (cf[i] .+ ses[i] .* fac)[:,:]')
+ end
+ end
+ table1 = DataFrame(hcat(object.coefficients[:,1],c_i), :auto)
+ table1 = rename(table1, append!([" "],pct))
+ ci = pretty_table(table1; header = append!([" "],pct), show_row_number = false, tf = tf_borderless)
+ return table1;
\ No newline at end of file
diff --git a/src/rlassoIVselectX.jl b/src/rlassoIVselectX.jl
index e855fe1..d14d83c 100644
--- a/src/rlassoIVselectX.jl
+++ b/src/rlassoIVselectX.jl
@@ -12,11 +12,11 @@ function rlassoIVselectX(x, d, y, z; post::Bool = true)
Zr[:, i] = lasso_z_x["residuals"]
- result = tsls(Dr, Yr, Zr, intercept = false)
+ result = tsls(Dr, Yr, Zr, nothing,intercept = false)
se = result["se"]
vcov = result["vcov"]
coef = result["coefficients"]
# coefnames = result["coefnames"]
- res = Dict("coefficients" => coef, "vcov" => vcov, "se" => se)
+ res = Dict("coefficients" => coef, "vcov" => vcov, "se" => se, "sample_size" => n)
return res
\ No newline at end of file
diff --git a/src/rlassoIVselectZ.jl b/src/rlassoIVselectZ.jl
index 8fd2e00..fb62a7d 100644
--- a/src/rlassoIVselectZ.jl
+++ b/src/rlassoIVselectZ.jl
@@ -1,9 +1,27 @@
-function rlassoIVselectZ(x, y, d, z; post::Bool = true, intercept::Bool = true)
+function ginv(X, tol = sqrt(eps(Float64)))
+ Xsvd = svd(X);
+ Positive = Xsvd.S .> maximum(tol * Xsvd.S[1]);
+ if all(Positive)
+ b = Xsvd.V *(1 ./ Xsvd.S .* (Xsvd.U'));
+ elseif (!any(Positive))
+ b = zeros(size(X)[2],size(X)[1])
+ else
+ c1 = Xsvd.V[:,Positive]
+ c2 = (Xsvd.U[:, Positive])';
+ b = c1 * ((1 ./ Xsvd.S[Positive]) .* c2)
+ end
+function rlassoIVselectZ(x, d, y, z; post::Bool = true, intercept::Bool = true)
n = size(y, 1)
kex = size(x, 2)
ke = size(d, 2)
+ d_names = ["d$y" for y = 1:ke]
+ x_names = ["x$y" for y = 1:kex];
+ coef_names = append!(d_names,x_names);
Z = hcat(z, x)
kiv = size(Z, 2)
select_mat = zeros(0)
@@ -40,18 +58,19 @@ function rlassoIVselectZ(x, y, d, z; post::Bool = true, intercept::Bool = true)
if isempty(Dhat)
Dhat = append!(Dhat, dihat)
- Dhat = hcat(Dhat, digat)
+ Dhat = hcat(Dhat, dihat)
Dhat = hcat(Dhat, x)
d = hcat(d, x)
- alpha_hat = pinv(Dhat' * d) * (Dhat' * y)
+ alpha_hat = ginv(Dhat' * d) * (Dhat' * y)
residuals = y - d * alpha_hat
Omega_hat = Dhat' * (Dhat .* (residuals .^ 2))
- Q_hat_inv = pinv(d' * Dhat)
+ Q_hat_inv = ginv(d' * Dhat)
vcov = Q_hat_inv * Omega_hat * Q_hat_inv'
+ alpha_hat = hcat(coef_names, alpha_hat)
res = Dict("coefficients" => alpha_hat[1:ke, :], "se" => sqrt.(diag(vcov))[1:ke], "residuals" => residuals, "samplesize" => n)
return res
\ No newline at end of file
diff --git a/src/rlassologit.jl b/src/rlassologit.jl
index 455b8fb..3ec8f97 100644
--- a/src/rlassologit.jl
+++ b/src/rlassologit.jl
@@ -1,8 +1,22 @@
+mutable struct rlassologit
+ result::Dict
+ head_msg
+ select
+ table
function rlassologit(x, y; model::Bool = true, c::Float64 = 1.1, post::Bool = true,
n::Int64 = size(x, 1), gamma::Float64 = 0.1 / log(n),
lambda::Any = nothing, intercept::Bool = true,
threshold::Any = nothing)
+ x_0 = copy(x)
+ y_0 = copy(y)
+ x = Matrix(x[:, :])
+ y = y[:, 1]
n = size(x, 1)
p = size(x, 2)
if !isnothing(c)
@@ -86,16 +100,97 @@ function rlassologit(x, y; model::Bool = true, c::Float64 = 1.1, post::Bool = tr
a0 = 0
coefs = coefTemp
+ ### === Output print
+ if intercept names_columns = ["Intercept"] else names_columns = [] end
+ select_columns = []
+ # print(names(x_0))
+ if typeof(x_0) == DataFrame
+ names_columns = vcat(names_columns, names(x_0))
+ # print(names_columns)
+ else
+ for i in 1:p
+ vl = "V $i"
+ push!(names_columns, vl)
+ end
+ # print(names_columns)
+ end
+ for i in 1:p
+ if coefs[i] != 0
+ push!(select_columns, i)
+ end
+ end
+ head1 = "
+ ------
+ Post-Lasso estimation: $post
+ Intercept: $intercept
+ Control: $threshold
+ Total number of variables: $p
+ Number of selected variables: $(length(select_columns) - intercept)
+ ------
+ "
+ println(head1)
+ println(" ")
+ table_lgt = hcat(names_columns, coefs)
+ @ptconf tf = tf_simple alignment = :l
+ header = ["Variable", "Estimate"]
+ @pt :header = header table_lgt[select_columns, :]
+ print("rlassologit")
+ # print("head")
+ ###
est = Dict("coefficients" => coefs, "beta" => coefTemp, "intercept" => a0, "index" => ind1,
- "lambda0" => lambda0, "residuals" => e1, "sigma" => sqrt(var(e1)),
- "options" => Dict("post" => post, "intercept" => intercept, "control" => threshold))
- return est
+ "lambda0" => lambda0, "residuals" => e1, "sigma" => sqrt(var(e1)),
+ "options" => Dict("post" => post, "intercept" => intercept, "control" => threshold));
+ return rlassologit(est, head1, select_columns, table_lgt);
+function r_summary(result::rlassologit, all::Bool = false)
+ println(result.head_msg)
+ @ptconf tf = tf_simple alignment = :l
+ if all
+ table_print = result.table
+ # end
+ else
+ table_print = result.table
+ table_print = table_print[result.select, :]
+ end
+ # print("")
+ @pt :header = ["Variable", "Estimate"] table_print
+ # @pt :header = ["Variable", "Estimate"] result.table
+mutable struct rlassologitEffect
+ result::Dict
+ head_msg
function rlassologitEffect(X, Y, D; I3::Any = nothing, post = true)
+ colnames = try
+ names(X)
+ catch
+ []
+ end
x = Matrix(X[:, :])
d = D[:]
y = Y[:]
@@ -109,16 +204,16 @@ function rlassologitEffect(X, Y, D; I3::Any = nothing, post = true)
dx = hcat(d, x)
l1 = rlassologit(dx, y, post = post, intercept = true, lambda = la1)
- x1 = l1["residuals"]
+ x1 = l1.result["residuals"]
- t = hcat(ones(n), dx) * l1["coefficients"]
+ t = hcat(ones(n), dx) * l1.result["coefficients"]
sigma2 = exp.(t) ./ (1 .+ exp.(t)).^2
w = copy(sigma2)
f = sqrt.(sigma2)
- I1 = l1["ind1ex"][Not(1)]
+ I1 = l1.result["index"][Not(1)]
lambda = 2.2 * sqrt(n) * quantile(Normal(0, 1), 1 - 0.05 / max(n, p * log(n)))
la2 = repeat([lambda], p)
@@ -127,7 +222,7 @@ function rlassologitEffect(X, Y, D; I3::Any = nothing, post = true)
l2 = rlasso(xf, df, post = post, intercept = true, homoskedastic = false, lambda_start = la2, c = 1.1, gamma = 0.1)
# return l2
# include("hdmjl.jl")
- I2 = l2["ind1ex"]
+ I2 = l2["index"]
z = l2["residuals"] ./ sqrt.(sigma2)
if isnothing(I3)
@@ -140,17 +235,18 @@ function rlassologitEffect(X, Y, D; I3::Any = nothing, post = true)
ind1 = []
- for i in eachind1ex(I)
+ for i in eachindex(I)
if I[i] > 0
append!(ind1, i)
- ind1
xselect = x[:, ind1]
p3 = size(xselect, 2)
- data3 = DataFrame(hcat(y, d, xselect))
+ data3 = DataFrame(hcat(y, d, xselect), :auto)
rename!(data3, "x1" => "y")
@@ -171,7 +267,22 @@ function rlassologitEffect(X, Y, D; I3::Any = nothing, post = true)
no_select = 0
+ ##-----
+ main_mssg =
+ "
+ ---
+ post = $post
+ ----
+ se = $se
+ alpha = $alpha
+ sample_size = $n
+ ---
+ "
+ # main_table = hcat(coe)
# GLM.residuals(l3)
+ print(main_mssg)
res = Dict("epsilon" => y - g3, "v" => z)
results = Dict(
@@ -179,18 +290,34 @@ function rlassologitEffect(X, Y, D; I3::Any = nothing, post = true)
"no_select" => no_select, "coefficients" => alpha, "coefficient" => alpha,
"residuals" => res, "sample_size" => n, "post" => post
- return results
+ return rlassologitEffect(results, main_mssg)
+function r_summary(result::rlassologitEffect)
+ print(result.head_msg)
+mutable struct rlassologitEffects
+ result
+ head_mssg
+ table
-function rlassologitEffects(x, y; ind1ex = 1:3, I3 = nothing, post = true)
- x = Matrix(x)
+function rlassologitEffects(x, y; index = 1:size(x, 2), I3 = nothing, post = true)
+ names_col = try
+ names(x)
+ catch
+ nothing
+ end
+ x = Matrix(x[:, :])
y = Matrix(y[:, :])
n, p = size(x)
- if Set(ind1ex) == 2
- k = p1 = sum(ind1ex)
+ if Set(index) == 2
+ k = p1 = sum(index)
- k = p1 = length(ind1ex)
+ k = p1 = length(index)
coefficients = zeros(k)
@@ -201,34 +328,63 @@ function rlassologitEffects(x, y; ind1ex = 1:3, I3 = nothing, post = true)
# print(k)
for i in 1:k
- d = x[:, ind1ex[i]]
- xt = x[:, Not(ind1ex[i])]
+ d = x[:, index[i]]
+ xt = x[:, Not(index[i])]
if isnothing(I3)
I3m = I3
- I3m = I3[Not(ind1ex[i])]
+ I3m = I3[Not(index[i])]
lasso_regs[i] = try
- rlassologitEffect(xt, y, d, I3 = I3m, post = post)
+ rlassologitEffect(xt, y, d, I3 = I3m, post = post);
if lasso_regs[i] == "try-error"
- coefficients[i] = lasso_regs[i]["alpha"]
- se[i] = lasso_regs[i]["se"]
- t[i] = lasso_regs[i]["t"]
- reside["epsilon"][i] = lasso_regs[i]["residuals"]["epsilon"]
- reside["v"][i] = lasso_regs[i]["residuals"]["v"]
+ # print(lasso_regs[i])
+ coefficients[i] = lasso_regs[i].result["alpha"]
+ se[i] = lasso_regs[i].result["se"]
+ t[i] = lasso_regs[i].result["t"]
+ reside["epsilon"][i] = lasso_regs[i].result["residuals"]["epsilon"]
+ reside["v"][i] = lasso_regs[i].result["residuals"]["v"]
+ head_mssg = "
+ ---
+ post: $post
+ index: $(collect(index)')
+ ---
+ "
+ n_cl = []
+ if isnothing(names_col)
+ for i in 1:p
+ push!(n_cl, "V $i")
+ end
+ else
+ n_cl = names_col
+ end
+ main_table = hcat(n_cl[index], coefficients)
+ header = ["Variable", "Estimate"]
+ print(head_mssg)
+ print("\n")
+ @ptconf tf = tf_simple alignment = :l
+ @pt :header = header main_table
# residual =
res = Dict(
"coefficients" => coefficients, "se" => se, "t" => t,
- "lasso_regs" => lasso_regs, "ind1ex" => ind1ex, "sample_size" => n,
+ "lasso_regs" => lasso_regs, "index" => index, "sample_size" => n,
"residuals" => reside
- return res
\ No newline at end of file
+ return rlassoEffects(res, head_mssg, main_table)
diff --git a/src/rlassotreatment.jl b/src/rlassotreatment.jl
index 8e89ff4..44cc8c2 100644
--- a/src/rlassotreatment.jl
+++ b/src/rlassotreatment.jl
@@ -1,3 +1,10 @@
+mutable struct rlassoTE
+ result::Dict
+ # main_tbl
+ type
function rlassoLATE(x, d, y, z; bootstrap = "none", n_rep = 100, always_takers = true,
post = true, intercept = true, never_takers = true)
@@ -102,7 +109,7 @@ function rlassoLATE(x, d, y, z; bootstrap = "none", n_rep = 100, always_takers =
b_z_xl = rlassologit(x1, z, post = post, intercept = intercept)
- yp_b = get_mtrx(x1) * b_z_xl["coefficients"]
+ yp_b = get_mtrx(x1) * b_z_xl.result["coefficients"]
mz_x = 1 ./ (1 .+ exp.(-1 .* (yp_b)))
mz_x = mz_x .* ((mz_x .> 1e-12) .& (mz_x .< (1 - 1e-12))) .+ (1 - 1e-12) .* (
@@ -145,16 +152,19 @@ function rlassoLATE(x, d, y, z; bootstrap = "none", n_rep = 100, always_takers =
object["type_boot"] = bootstrap
object["boot_n"] = n_rep
- object
+ object["type_boot"] = bootstrap
- return object
+ return rlassoTE(object, object["type"])
function rlassoATE(x, d, y; bootstrap = "none", n_rep = 500)
z = copy(d)
res = rlassoLATE(x, d, y, z, bootstrap = bootstrap, n_rep = n_rep)
- res["type"] = "ATE"
- return res
+ type = "ATE"
+ # res.result["type"] = type
+ res.result["type_boot"] = bootstrap
+ return rlassoTE(res.result, type)
function rlassoLATET(x, d, y, z; bootstrap::String = "none", n_rep::Int64 = 500, post::Bool = true, always_takers::Bool = true, never_takers::Bool = true, intercept::Bool = true)
@@ -190,7 +200,7 @@ function rlassoLATET(x, d, y, z; bootstrap::String = "none", n_rep::Int64 = 500,
b_z_xl = rlassologit(x, z, post = post, intercept = intercept, c = 1.1, gamma = 0.1, lambda = lambda)
if intercept
- mz_x = hcat(ones(n), x) * b_z_xl["coefficients"]
+ mz_x = hcat(ones(n), x) * b_z_xl.result["coefficients"]
elseif !intercept
mz_x = x * b_z_xl["coefficients"]
@@ -224,16 +234,44 @@ function rlassoLATET(x, d, y, z; bootstrap::String = "none", n_rep::Int64 = 500,
boot[i] = mean(weights .* ((y - my_z0x) - (ones(n) - z) .* (y - my_z0x) ./ (ones(n) - mz_x))) / mean(weights .* ((d - md_z0x) - (ones(n) - z) .* (d - md_z0x) ./ (ones(n) - mz_x)))
res["boot_se"] = sqrt(var(boot))
- res["type_boot"] = bootstrap
+ res["type_boot"] = bootstrap
return res
- return res
+ res["type_boot"] = bootstrap
+ type = "LATET"
+ return rlassoTE(res, type)
function rlassoATET(x, d, y, bootstrap::String = "none", n_rep::Int64 = 500)
z = copy(d)
res = rlassoLATET(x, d, y, z, bootstrap = bootstrap, n_rep = n_rep)
- res["type"] = "ATET"
- return res
+ type = "ATET"
+ return rlassoTE(res.result, type)
+function r_summary(rlasso_TE::rlassoTE)
+ # boot_strap = rlasso_TE[""]
+ print("
+ Estimation and significance tesing of the treatment effect
+ Type: $(rlasso_TE.type)
+ Bootstrap: $(rlasso_TE.result["type_boot"])
+ ")
+ print("\n")
+ results = rlasso_TE.result
+ coef = results["te"]
+ se = results["se"]
+ std_ = coef / se
+ header = ["Coeff", "SE", "t.value"]
+ tbl = [coef se std_]
+ @ptconf tf = tf_simple
+ @pt :header = header tbl
+ return tbl
\ No newline at end of file
diff --git a/src/tsls.jl b/src/tsls.jl
index ef95497..0f02174 100644
--- a/src/tsls.jl
+++ b/src/tsls.jl
@@ -9,11 +9,36 @@ end
function tsls(d::Array, y::Array, z::Array, x::Union{Nothing, Array} = nothing; intercept::Bool = true, homoscedastic::Bool = true) # x::Union{Nothing, DataFrame, Array}
n = size(y, 1)
+ # if intercept == true
+ # d_names = ["d$y" for y = 1:size(d[:,:],2)]
+ # x_names = ["x$y" for y = 1:size(x,2)];
+ # coef_names = append!(append!(d_names, ["intercept"]), x_names);
+ # else
+ # d_names = ["d$y" for y = 1:size(d[:,:],2)]
+ # x_names = ["x$y" for y = 1:size(x,2)];
+ # coef_names = append!(d_names,x_names);
+ # end
- if intercept && !isnothing(x)
+ if intercept == true && isnothing(x) == false
+ d_names = ["d$y" for y = 1:size(d[:,:],2)]
+ x_names = ["x$y" for y = 1:size(x,2)];
+ coef_names = append!(append!(d_names, ["intercept"]), x_names);
x = hcat(ones(n, 1), x)
- elseif intercept && isnothing(x)
+ elseif intercept == true && isnothing(x) == true
x = ones(n, 1)
+ d_names = ["d$y" for y = 1:size(d[:,:],2)]
+ #x_names = ["x$y" for y = 1:size(x,2)];
+ coef_names = append!(d_names, ["intercept"]);
+ elseif intercept == false && isnothing(x) == true
+ d_names = ["d$y" for y = 1:size(d[:,:],2)]
+ #x_names = ["x$y" for y = 1:size(x,2)];
+ coef_names = d_names;
+ elseif intercept == false && isnothing(x) == false
+ d_names = ["d$y" for y = 1:size(d[:,:],2)]
+ x_names = ["x$y" for y = 1:size(x,2)];
+ coef_names = append!(d_names, x_names)
a1 = size(d, 2)
@@ -58,6 +83,7 @@ function tsls(d::Array, y::Array, z::Array, x::Union{Nothing, Array} = nothing;
se = sqrt(VC1)
+ b = hcat(coef_names, b)
res = Dict("coefficients" => b, "vcov" => VC1, "se" => se, "residuals" => e, "sample_size" => n)
return res
\ No newline at end of file
diff --git a/test/rlogit.jl b/test/rlogit.jl
new file mode 100644
index 0000000..65624d4
--- /dev/null
+++ b/test/rlogit.jl
@@ -0,0 +1,91 @@
+] activate .
+using HDMjl, Random, Distributions
+n = 250
+p = 40
+px = 10
+X = randn((n, p))
+beta = vcat(repeat([2.], px), zeros(p - px))
+intercept = 1
+P = exp.(intercept .+ X * beta) ./ (1 .+ exp.(intercept .+ X * beta))
+# y = Int64[]
+x = rand(Normal(), (n, p))
+y = rand(Binomial(), n)
+logit_out = rlassologit(x, y, intercept = true)
+function suma(x)
+ z1 = print("z")
+ n = Dict("x" => x, "z" => z1)
+ z1
+ return z1;
+function suma.actio(X)
+ return x + 1
+suma(x, y) = x + y
+mutable struct lasologit
+ x::Int
+ y::Int
+ fsit::Function = x -> x + y
+lasologit(12, 3)
+mutable struct BitNumber
+ val::Int
+ bit_end::UInt
+ bit_start::UInt
+ width::UInt
+ the_bits::Function
+ BitNumber(v,e,s,w,bits_func) = begin
+ ret = new( (a=63-e+s; v=v<>a) ,e,s,w)
+ ret.the_bits = (bit_end,bit_start) -> bits_func(ret,bit_end,bit_start)
+ ret
+ end
+mutable struct sumas
+ x::Int64
+ y::Int64
+ # add_new::Function
+ suma(x, y) = x + y
+Base.@kwdef struct Model1
+ p::Float64 = 2.0
+ n::Int64 = 4
+ # f::Function
+ f::Function = (p, n) -> begin
+ n + p + 1
+ end
+ # function f(x, y)
+ # x + y
+ # end
+typeof(Model1().f(12, 1))
+mutable struct si1
+ x::Real = 12
+ y::Real = 8
+ si1(x, y) = x + y
\ No newline at end of file
diff --git a/tutorial/HDMjl_tutorial.ipynb b/tutorial/HDMjl_tutorial.ipynb
index 690831e..97a73b4 100644
--- a/tutorial/HDMjl_tutorial.ipynb
+++ b/tutorial/HDMjl_tutorial.ipynb
@@ -16,36 +16,114 @@
"cell_type": "code",
- "execution_count": 56,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
- "import Pkg; Pkg.add(\"HDMjl\")"
+ "import Pkg; using Pkg"
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": 9,
"metadata": {},
+ "outputs": [],
"source": [
- "or"
+ "#Pkg.rm(\"HDMjl\")"
"cell_type": "code",
- "execution_count": 57,
+ "execution_count": 12,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m git-repo `https://github.com/d2cml-ai/HDMjl.jl`\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m\u001b[1m Resolving\u001b[22m\u001b[39m package versions..."
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m `C:\\Users\\User\\.julia\\environments\\v1.8\\Project.toml`\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " \u001b[90m [8de29b41] \u001b[39m\u001b[92m+ HDMjl v0.0.11 `https://github.com/d2cml-ai/HDMjl.jl#prueba`\u001b[39m\n",
+ "\u001b[32m\u001b[1m Updating\u001b[22m\u001b[39m "
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "`C:\\Users\\User\\.julia\\environments\\v1.8\\Manifest.toml`\n",
+ " \u001b[90m [8de29b41] \u001b[39m\u001b[92m+ HDMjl v0.0.11 `https://github.com/d2cml-ai/HDMjl.jl#prueba`\u001b[39m"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[32m\u001b[1mPrecompiling\u001b[22m\u001b[39m"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " project..."
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\u001b[33m ✓ \u001b[39mHDMjl\n",
+ " 1 dependency successfully precompiled in 28 seconds. 292 already precompiled.\n",
+ " \u001b[33m1\u001b[39m dependency precompiled but a different version is currently loaded. Restart julia to access the new version\n"
+ ]
+ }
+ ],
"source": [
- "import Pkg; Pkg.add(url = \"https://github.com/d2cml-ai/HDMjl.jl\")"
+ "Pkg.add(url = \"https://github.com/d2cml-ai/HDMjl.jl\", rev = \"prueba\")"
"cell_type": "code",
- "execution_count": 102,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
- "using HDMjl, CodecXz, GLM\n"
+ "using CodecXz, RData, DataFrames, StatsModels, Statistics, Distributions, PrettyTables, GLM, CSV, HDMjl, LinearAlgebra, StatsModels"
@@ -62,168 +140,279 @@
"### 3.2. A Joint Significance Test for Lasso Regression."
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Example. (Prediction Using Lasso and Post-Lasso) Consider generated data from a sparse linear model:"
+ ]
+ },
"cell_type": "code",
- "execution_count": 95,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
- "using Random\n",
- "Random.seed!(1234);\n",
- "n = 100;\n",
- "p = 100;\n",
- "s = 3;\n",
- "X = randn(n, p);\n",
- "beta = vcat(fill(5, s), zeros(p - s));\n",
- "Y = X * beta + randn(n);"
+ "url = \"https://raw.githubusercontent.com/d2cml-ai/HDMjl.jl/prueba/data/3_2.csv\"\n",
+ "dta = DataFrame(CSV.read(download(url), DataFrame));\n",
+ "n, p = size(dta);\n",
+ "Y = dta[:,1];\n",
+ "X = Matrix(dta[:,2:end]);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Next we estimate the model, print the results, and make in-sample and out-of sample predictions. We can use methods print and summarize to print the results, where the option all can be set to FALSE to limit the print only to the non-zero coefficients.\n"
"cell_type": "code",
- "execution_count": 103,
+ "execution_count": 4,
"metadata": {},
"outputs": [
- "data": {
- "text/plain": [
- "Dict{String, Any} with 15 entries:\n",
- " \"tss\" => 8466.4\n",
- " \"dev\" => [8.56479, -6.50463, -1.51601, 8.77722, -5.1338, -7.07815, 7…\n",
- " \"model\" => [0.970656 0.262456 … 1.86802 -0.460151; -0.979218 -0.022244…\n",
- " \"loadings\" => [1.99904, 1.58734, 1.67152, 1.86208, 1.859, 1.8311, 1.45692…\n",
- " \"sigma\" => 1.56664\n",
- " \"lambda0\" => 81.3601\n",
- " \"lambda\" => [162.642, 129.146, 135.995, 151.499, 151.248, 148.978, 118.…\n",
- " \"intercept\" => -0.196115\n",
- " \"iter\" => 16\n",
- " \"residuals\" => [0.119342, -0.25299, -1.17148, 0.282269, -0.643579, -0.5525…\n",
- " \"rss\" => 242.981\n",
- " \"index\" => Bool[1, 1, 1, 0, 0, 0, 0, 0, 0, 0 … 0, 0, 0, 0, 0, 0, 0, …\n",
- " \"beta\" => [4.31658, 4.39195, 4.45657, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0…\n",
- " \"options\" => Dict{String, Any}(\"intercept\"=>true, \"post\"=>false, \"meanx\"…\n",
- " \"coefficients\" => [-0.196115, 4.31658, 4.39195, 4.45657, 0.0, 0.0, 0.0, 0.0, …"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " Post-Lasso Estimation: false\n",
+ " Total number of variables: 100\n",
+ " Number of selected variables: 11\n",
+ " ---\n",
+ " \n",
+ "============ ==============\n",
+ " \u001b[1m Variable \u001b[0m \u001b[1m Estimate \u001b[0m\n",
+ "============ ==============\n",
+ " Intercept 0.056855\n",
+ " V 1 4.77121\n",
+ " V 2 4.69284\n",
+ " V 3 4.76568\n",
+ " V 13 -0.0453685\n",
+ " V 15 -0.0467382\n",
+ " V 16 -0.00499617\n",
+ " V 19 -0.0922336\n",
+ " V 22 -0.0272553\n",
+ " V 40 -0.0105032\n",
+ " V 61 0.113585\n",
+ " V 100 -0.0247296\n",
+ "============ ==============\n",
+ "\n",
+ " ----\n",
+ " Multiple R-squared: 0.9912720815874809\n",
+ " Adjusted R-squared: 0.9901810917859161\n",
+ " "
+ ]
"source": [
- "rlasso(X, Y, post = false)"
+ "lasso_reg = rlasso(X, Y, post = false);\n",
+ "sum_lasso = r_summary(lasso_reg)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url = \"https://raw.githubusercontent.com/d2cml-ai/HDMjl.jl/prueba/data/3_2_2.csv\"\n",
+ "dta = DataFrame(CSV.read(download(url), DataFrame));\n",
+ "n, p = size(dta);\n",
+ "yhat_lasso = r_predict(lasso_reg) #in-sample prediction\n",
+ "Ynew = dta[:,1];\n",
+ "Xnew = Matrix(dta[:,2:end]);\n",
+ "yhat_lasso_new = r_predict(lasso_reg, xnew = Xnew) #out-of-sample prediction\n",
+ ";"
"cell_type": "code",
- "execution_count": 89,
+ "execution_count": 6,
"metadata": {},
"outputs": [
- "data": {
- "text/plain": [
- "1×101 adjoint(::Vector{Float64}) with eltype Float64:\n",
- " -0.00682754 5.00958 4.93178 5.17705 … 0.0 0.0 0.0 0.0 0.0 0.0 0.0"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " Post-Lasso Estimation: true\n",
+ " Total number of variables: 100\n",
+ " Number of selected variables: 3\n",
+ " ---\n",
+ " \n",
+ "============ ============\n",
+ " \u001b[1m Variable \u001b[0m \u001b[1m Estimate \u001b[0m\n",
+ "============ ============\n",
+ " Intercept 0.0341043\n",
+ " V 1 4.92413\n",
+ " V 2 4.85787\n",
+ " V 3 4.96442\n",
+ "============ ============\n",
+ "\n",
+ " ----\n",
+ " Multiple R-squared: 0.9906284190077158\n",
+ " Adjusted R-squared: 0.990335557101707\n",
+ " "
+ ]
"source": [
"post_lasso_reg = rlasso(X, Y, post = true) #now use post-lasso\n",
- "post_lasso_reg[\"coefficients\"]'"
+ "r_summary(post_lasso_reg)"
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": 7,
"metadata": {},
+ "outputs": [],
"source": [
- "## 4. Inference on Target Regression Coefficients"
+ "yhat_postlasso = r_predict(post_lasso_reg) #in-sample prediction\n",
+ "yhat_postlasso_new = r_predict(post_lasso_reg, xnew = Xnew) #in-sample prediction\n",
+ ";"
- "cell_type": "markdown",
+ "cell_type": "code",
+ "execution_count": 8,
"metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "============ =================\n",
+ " \u001b[1m lasso MAE \u001b[0m \u001b[1m Post-lasso MAE \u001b[0m\n",
+ "============ =================\n",
+ " 0.879583 0.78017\n",
+ "============ =================\n"
+ ]
+ }
+ ],
"source": [
- "### 4.1. Intuition for the Orthogonality Principle in Linear Models via Partialling Out."
+ "MAE = mean(eachrow(hcat(abs.(Ynew - yhat_lasso_new), abs.(Ynew - yhat_postlasso_new))))\n",
+ "MAE = DataFrame([[MAE[1]], [MAE[2]]], :auto)\n",
+ "MAE = rename!(MAE, [\"lasso MAE\", \"Post-lasso MAE\"])\n",
+ "pretty_table(MAE, tf = tf_simple, nosubheader = true)"
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
- "source": []
+ "source": [
+ "## 4. Inference on Target Regression Coefficients"
+ ]
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 4.2. Inference: Confidence Intervals and Significance Testing. The function rlassoEffects"
+ "### 4.1. Intuition for the Orthogonality Principle in Linear Models via Partialling Out."
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "url = \"https://raw.githubusercontent.com/d2cml-ai/HDMjl.jl/prueba/data/4_1.csv\"\n",
+ "dta = DataFrame(CSV.read(download(url), DataFrame));\n",
+ "n, p = size(dta);\n",
+ "y = dta[:,\"y\"];\n",
+ "d = dta[:,\"d\"];\n",
+ "x = Matrix(dta[:,3:end]);"
+ ]
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 4.3. Application: the effect of gender on wage"
+ "We can estimate $\\alpha_0$ by running full least squares:"
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "1 rows × 2 columns
| Estimate | Std_Error |
| Float64 | Float64 |
1 | 0.978075 | 0.0137122 |
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cc}\n",
+ "\t& Estimate & Std\\_Error\\\\\n",
+ "\t\\hline\n",
+ "\t& Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 0.978075 & 0.0137122 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "\u001b[1m1×2 DataFrame\u001b[0m\n",
+ "\u001b[1m Row \u001b[0m│\u001b[1m Estimate \u001b[0m\u001b[1m Std_Error \u001b[0m\n",
+ "\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\n",
+ "─────┼─────────────────────\n",
+ " 1 │ 0.978075 0.0137122"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "full_fit = lm(hcat(ones(length(y)), Matrix(dta[:,2:end])), y);\n",
+ "DataFrame(Estimate = coef(full_fit)[2], Std_Error = stderror(full_fit)[2])"
+ ]
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 4.4. Application: Estimation of the treatment effect in a linear model with many confounding factors"
+ "Another way to estimate $\\alpha_0$ is to first partial out the x-variables from $y_i$ and $d_i$, and run least squares on the residuals:"
"cell_type": "code",
- "execution_count": 92,
+ "execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
- "using CodecXz\n",
- "using RData\n",
- "using DataFrames\n",
- "url = \"https://github.com/cran/hdm/raw/master/data/GrowthData.rda\";\n",
- "GrowthData = load(download(url))[\"GrowthData\"];\n",
- "y = GrowthData[:, 1];\n",
- "d = GrowthData[:, 3];\n",
- "X = Matrix(GrowthData[:, Not(1, 2, 3)]);"
+ "rY_1 = lm(hcat(ones(length(y)), Matrix(dta[:,3:end])), y);\n",
+ "rY = y - predict(rY_1)\n",
+ "rD_1 = lm(hcat(ones(length(y)), Matrix(dta[:,3:end])), d);\n",
+ "rD = d - predict(rD_1);"
"cell_type": "code",
- "execution_count": 93,
+ "execution_count": 17,
"metadata": {},
"outputs": [
"data": {
+ "text/html": [
+ "1 rows × 2 columns
| Estimate | Std_Error |
| Float64 | Float64 |
1 | 0.978075 | 0.0136862 |
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cc}\n",
+ "\t& Estimate & Std\\_Error\\\\\n",
+ "\t\\hline\n",
+ "\t& Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 0.978075 & 0.0136862 \\\\\n",
+ "\\end{tabular}\n"
+ ],
"text/plain": [
- "Dict{String, Any} with 10 entries:\n",
- " \"alpha\" => -0.0453558\n",
- " \"t\" => -2.43116\n",
- " \"se\" => 0.018656\n",
- " \"no_select\" => 0\n",
- " \"coefficients_reg\" => [0.2247, -0.0453558, -0.064512, 0.215358, -0.0960046, -…\n",
- " \"sample_size\" => 90\n",
- " \"coefficient\" => -0.0453558\n",
- " \"selection_index\" => Bool[1, 1, 1, 1, 1, 1, 1, 1, 0, 0 … 0, 0, 0, 0, 0, 0,…\n",
- " \"residuals\" => Dict(\"v\"=>[0.210235, 0.201381, 0.00253076, -0.0671351, …\n",
- " \"coefficients\" => -0.0453558"
+ "\u001b[1m1×2 DataFrame\u001b[0m\n",
+ "\u001b[1m Row \u001b[0m│\u001b[1m Estimate \u001b[0m\u001b[1m Std_Error \u001b[0m\n",
+ "\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\n",
+ "─────┼─────────────────────\n",
+ " 1 │ 0.978075 0.0136862"
"metadata": {},
@@ -231,27 +420,43 @@
"source": [
- "rlassoEffect(X, y, d, method = \"double selection\")"
+ "partial_fit_ls = lm(hcat(ones(length(y)), rD), rY)\n",
+ "DataFrame(Estimate = coef(partial_fit_ls)[2], Std_Error = stderror(partial_fit_ls)[2])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In high-dimensional settings, we can no longer rely on the full least-squares and instead may rely on\n",
+ "Lasso or Post-Lasso for partialling out"
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 18,
"metadata": {},
"outputs": [
"data": {
+ "text/html": [
+ "1 rows × 2 columns
| Estimate | Std_Error |
| Float64 | Float64 |
1 | 0.972739 | 0.0136868 |
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cc}\n",
+ "\t& Estimate & Std\\_Error\\\\\n",
+ "\t\\hline\n",
+ "\t& Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 0.972739 & 0.0136868 \\\\\n",
+ "\\end{tabular}\n"
+ ],
"text/plain": [
- "Dict{String, Any} with 9 entries:\n",
- " \"alpha\" => -0.05333\n",
- " \"t\" => -3.722\n",
- " \"se\" => 0.0143283\n",
- " \"coefficients_reg\" => [-0.0845872, -0.0461165, 0.18946, -0.0299086, 0.0, 0.0,…\n",
- " \"sample_size\" => 90\n",
- " \"coefficient\" => -0.05333\n",
- " \"selection_index\" => Any[true, true, true, true, true, true, true, true, fal…\n",
- " \"residuals\" => Dict(\"v\"=>[0.181221, 0.116676, 0.0971757, -0.133713, 0.…\n",
- " \"coefficients\" => -0.05333"
+ "\u001b[1m1×2 DataFrame\u001b[0m\n",
+ "\u001b[1m Row \u001b[0m│\u001b[1m Estimate \u001b[0m\u001b[1m Std_Error \u001b[0m\n",
+ "\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\n",
+ "─────┼─────────────────────\n",
+ " 1 │ 0.972739 0.0136868"
"metadata": {},
@@ -259,115 +464,229 @@
"source": [
- "rlassoEffect(X, y, d, method = \"partialling out\")"
+ "rY_1 = rlasso(hcat(ones(length(y)), Matrix(dta[:,3:end])), y);\n",
+ "rY = rY_1[\"residuals\"]\n",
+ "rD_1 = rlasso(hcat(ones(length(y)), Matrix(dta[:,3:end])), d);\n",
+ "rD = rD_1[\"residuals\"]\n",
+ "partial_fit_postlasso = lm(hcat(ones(length(y)), rD), vec(rY))\n",
+ "DataFrame(Estimate = coef(partial_fit_postlasso)[2], Std_Error = stderror(partial_fit_postlasso)[2])"
"cell_type": "markdown",
"metadata": {},
"source": [
- "## 5. Instrumental Variable Estimation in a High-Dimensional Setting"
+ "The orthogonal estimating equations method – based on partialling out via Lasso or post-Lasso – is\n",
+ "implemented by the function rlassoEffect, using method= \"partialling out\":"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimates and significance testing of the effect of target variables\n",
+ " \u001b[1m Row \u001b[0m \u001b[1m Estimate. \u001b[0m \u001b[1m Std. Error \u001b[0m \u001b[1m t value \u001b[0m \u001b[1m Pr(>|t|) \u001b[0m\n",
+ "\n",
+ " 1 0.97274 0.01369 71.05478 0.0 ***\n",
+ "---\n",
+ "Signif. codes:\n",
+ "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n"
+ ]
+ }
+ ],
+ "source": [
+ "Eff = rlassoEffect(x, y, d, method = \"partialling out\");\n",
+ "r_summary(Eff);"
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 5.2. Application: Economic Development and Institutions."
+ "Another orthogonal estimating equations method – based on the double selection of covariates – is im\u0002plemented by the the function rlassoEffect, using method= \"double selection"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimates and significance testing of the effect of target variables\n",
+ " \u001b[1m Row \u001b[0m \u001b[1m Estimate. \u001b[0m \u001b[1m Std. Error \u001b[0m \u001b[1m t value \u001b[0m \u001b[1m Pr(>|t|) \u001b[0m\n",
+ "\n",
+ " 1 0.97807 0.01416 69.07274 0.0 ***\n",
+ "---\n",
+ "Signif. codes:\n",
+ "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n"
+ ]
+ }
+ ],
+ "source": [
+ "Eff = rlassoEffect(x, y, d, method = \"double selection\");\n",
+ "r_summary(Eff);"
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 5.3. Application: Impact of Eminent Domain Decisions on Economic Outcomes."
+ "### 4.2. Inference: Confidence Intervals and Significance Testing. The function rlassoEffects"
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 106,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url = \"https://raw.githubusercontent.com/d2cml-ai/HDMjl.jl/prueba/data/4_2.csv\"\n",
+ "data = DataFrame(CSV.read(download(url), DataFrame));\n",
+ "n, p = size(data);\n",
+ "y = data[:,1];\n",
+ "#d = dta[:,\"d\"];\n",
+ "x = Matrix(data[:,2:end]);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can do inference on a set of variables of interest, e.g. the first, second, third, and the fiftieth:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lasso_effects = rlassoEffects(x, y, index = [1,2,3,50]);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
"metadata": {},
"outputs": [
- "data": {
- "text/plain": [
- "Dict{String, Any} with 5 entries:\n",
- " \"se\" => [0.0801865]\n",
- " \"sample_size\" => 312\n",
- " \"vcov\" => [0.00642988;;]\n",
- " \"residuals\" => [-0.111753; 0.0588269; … ; 0.218765; 0.301602;;]\n",
- " \"coefficients\" => [-0.0449578;;]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Coefficients:\n",
+ "\n",
+ " \u001b[1m X1 \u001b[0m \u001b[1m X2 \u001b[0m \u001b[1m X3 \u001b[0m \u001b[1m X50 \u001b[0m\n",
+ "\n",
+ " 2.9445 3.0413 2.9754 0.072\n"
+ ]
"source": [
- "using Statistics\n",
- "url = \"https://github.com/cran/hdm/raw/master/data/EminentDomain.rda\";\n",
- "EminentDomain = load(download(url))[\"EminentDomain\"];\n",
- "z = EminentDomain[\"logGDP\"][\"z\"];\n",
- "x = EminentDomain[\"logGDP\"][\"x\"];\n",
- "d = EminentDomain[\"logGDP\"][\"d\"];\n",
- "y = EminentDomain[\"logGDP\"][\"y\"];\n",
- "x = x[:, (mean(x, dims = 1) .> 0.05)'];\n",
- "z = z[:, (mean(z, dims = 1) .> 0.05)'];\n",
- "rlassoIV(x, d, y, z)"
+ "r_print(lasso_effects, digits = 4)"
"cell_type": "code",
- "execution_count": 47,
+ "execution_count": 110,
"metadata": {},
"outputs": [
- "data": {
- "text/plain": [
- "Dict{String, Any} with 5 entries:\n",
- " \"se\" => [0.0801865]\n",
- " \"sample_size\" => 312\n",
- " \"vcov\" => [0.00642988;;]\n",
- " \"residuals\" => [-0.111753; 0.0588269; … ; 0.218765; 0.301602;;]\n",
- " \"coefficients\" => [-0.0449578;;]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimates and significance testing of the effect of target variables\n",
+ " \u001b[1m \u001b[0m \u001b[1m Estimate. \u001b[0m \u001b[1m Std. Error \u001b[0m \u001b[1m t value \u001b[0m \u001b[1m Pr(>|t|) \u001b[0m\n",
+ "\n",
+ " \u001b[1m X1 \u001b[0m 2.94448 0.0881468 33.4043 1.1892e-244\n",
+ " \u001b[1m X2 \u001b[0m 3.04127 0.083891 36.2527 9.0141e-288\n",
+ " \u001b[1m X3 \u001b[0m 2.9754 0.0780394 38.127 4.58085e-318\n",
+ " \u001b[1m X50 \u001b[0m 0.0719553 0.0776455 0.926716 0.354074\n",
+ "---\n",
+ "Signif. codes:\n",
+ "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n"
+ ]
"source": [
- "rlassoIV(x, d, y, z)"
+ "r_summary(lasso_effects);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " \u001b[1m \u001b[0m \u001b[1m 2.5% \u001b[0m \u001b[1m 97.5% \u001b[0m\n",
+ "\n",
+ " \u001b[1m X1 \u001b[0m 2.77171 3.11724\n",
+ " \u001b[1m X2 \u001b[0m 2.87685 3.2057\n",
+ " \u001b[1m X3 \u001b[0m 2.82245 3.12836\n",
+ " \u001b[1m X50 \u001b[0m -0.0802271 0.224138\n"
+ ]
+ }
+ ],
+ "source": [
+ "r_confint(lasso_effects);"
"cell_type": "markdown",
"metadata": {},
"source": [
- "## 6. Inference on Treatment Effects in a High-Dimensional Setting"
+ "We will also demonstrate the application of joint confidence intervals in an empirical application in\n",
+ "the next section."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 112,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " \u001b[1m \u001b[0m \u001b[1m 2.5% \u001b[0m \u001b[1m 97.5% \u001b[0m\n",
+ "\n",
+ " \u001b[1m X1 \u001b[0m 2.7304 3.15855\n",
+ " \u001b[1m X2 \u001b[0m 2.83944 3.24311\n",
+ " \u001b[1m X3 \u001b[0m 2.7855 3.16531\n",
+ " \u001b[1m X50 \u001b[0m -0.113325 0.257236\n"
+ ]
+ }
+ ],
+ "source": [
+ "r_confint(lasso_effects, joint = true);"
"cell_type": "markdown",
"metadata": {},
"source": [
- "### 6.3. Application: 401(k) plan participation."
+ "### 4.3. Application: the effect of gender on wage"
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 8,
"metadata": {},
"outputs": [
"data": {
"text/plain": [
- "Dict{String, Any} with 5 entries:\n",
- " \"se\" => 1930.68\n",
- " \"individual\" => [-30618.3, -57537.6, -71442.9, 21383.3, -2.32925e5, 3.40765e…\n",
- " \"sample_size\" => 9915\n",
- " \"te\" => 10180.1\n",
- " \"type\" => \"ATE\""
+ "(29217, 23)"
"metadata": {},
@@ -375,100 +694,1417 @@
"source": [
- "url = \"https://github.com/cran/hdm/raw/master/data/pension.rda\";\n",
- "pension = load(download(url))[\"pension\"];\n",
- "y = pension[:, \"tw\"];\n",
- "d = pension[:, \"p401\"];\n",
- "z = pension[:, \"e401\"];\n",
- "X = Matrix(pension[:, [\"i2\", \"i3\", \"i4\", \"i5\", \"i6\", \"i7\", \"a2\", \"a3\", \"a4\", \"a5\", \"fsize\", \"hs\", \"smcol\", \"col\", \"marr\", \"twoearn\", \"db\", \"pira\", \"hown\"]]);\n",
- "rlassoATE(X, d, y)"
+ "url = \"https://github.com/cran/hdm/raw/master/data/cps2012.rda\"\n",
+ "cps2012 = load(download(url))[\"cps2012\"];\n",
+ "n, p = size(cps2012);\n",
+ "size(cps2012)"
"cell_type": "code",
- "execution_count": 51,
+ "execution_count": 9,
"metadata": {},
"outputs": [
- "data": {
- "text/plain": [
- "Dict{String, Any} with 5 entries:\n",
- " \"se\" => 2944.43\n",
- " \"individual\" => [-21536.4, -52877.2, -1.44867e5, -2739.29, -307741.0, 7.3912…\n",
- " \"sample_size\" => 9915\n",
- " \"te\" => 12628.5\n",
- " \"type\" => \"ATET\""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(29217, 116)"
+ ]
"source": [
- "rlassoATET(X, d, y)"
+ "x_formula = @formula(lnw ~ -1 + female + female*widowed + female*divorced + female*separated + female*nevermarried +\n",
+ " female*hsd08 + female*hsd911 + female*hsg + female*cg + female*ad + female*mw + female*so + female*we + female*exp1 + female*exp2 + female*exp3)\n",
+ "x_dframe = ModelFrame( x_formula, cps2012)\n",
+ "x1 = ModelMatrix(x_dframe)\n",
+ "X = x1.m[:,Not(1:16)];\n",
+ "X = hcat(x1.m[:,1:16], X)\n",
+ "size(X)\n",
+ "fom_1 = [\"widowed\", \"divorced\", \"separated\", \"nevermarried\", \"hsd08\", \"hsd911\", \"hsg\", \"cg\", \"ad\", \"mw\", \"so\",\n",
+ " \"we\", \"exp1\", \"exp2\", \"exp3\"];\n",
+ "data = cps2012[:,fom_1];\n",
+ "sub_data = ones(size(data)[1])\n",
+ "for i in 1:size(data)[2]\n",
+ " if i <= (size(data)[2] -1)\n",
+ " sub_data = hcat(sub_data, Matrix(data[:, i] .* data[:, Not(1:i)]) )\n",
+ " end\n",
+ "end\n",
+ "sub_data = sub_data[:,2:end]\n",
+ "size(sub_data)\n",
+ "x = hcat(X, sub_data)\n",
+ "size(x)\n",
+ "filter = var.(eachcol(x)) .!= 0\n",
+ "x = x[:,filter]\n",
+ "print(size(x))\n",
+ "index_gender = [1,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31];\n",
+ "y = cps2012.lnw;\n",
+ ";"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The parameter estimates for the target parameters, i.e. all coefficients related to gender (i.e. by\n",
+ "interaction with other variables) are calculated and summarized by the following commands"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "effects_female = rlassoEffects(x, y, index = index_gender);"
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": 12,
"metadata": {},
"outputs": [
- "data": {
- "text/plain": [
- "Dict{String, Any} with 5 entries:\n",
- " \"se\" => 2326.9\n",
- " \"individual\" => [-50526.8, -1.39158e5, -1.37102e5, 38508.0, -6.5644e5, 7.943…\n",
- " \"sample_size\" => 9915\n",
- " \"te\" => 12992.1\n",
- " \"type\" => \"LATE\""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimates and significance testing of the effect of target variables\n",
+ " \u001b[1m \u001b[0m \u001b[1m Estimate. \u001b[0m \u001b[1m Std. Error \u001b[0m \u001b[1m t value \u001b[0m \u001b[1m Pr(>|t|) \u001b[0m\n",
+ "\n",
+ " \u001b[1m X1 \u001b[0m -0.154923 0.0501624 -3.08843 0.00201216\n",
+ " \u001b[1m X17 \u001b[0m 0.136095 0.0906626 1.50112 0.133325\n",
+ " \u001b[1m X18 \u001b[0m 0.136939 0.0221817 6.17353 6.6782e-10\n",
+ " \u001b[1m X19 \u001b[0m 0.0233028 0.0532118 0.437925 0.661441\n",
+ " \u001b[1m X20 \u001b[0m 0.186853 0.0199424 9.36966 7.27651e-21\n",
+ " \u001b[1m X21 \u001b[0m 0.0278103 0.120914 0.23 0.818092\n",
+ " \u001b[1m X22 \u001b[0m -0.119335 0.0518797 -2.30023 0.0214354\n",
+ " \u001b[1m X23 \u001b[0m -0.0128898 0.0192232 -0.670533 0.502518\n",
+ " \u001b[1m X24 \u001b[0m 0.0101386 0.0183265 0.553218 0.580114\n",
+ " \u001b[1m X25 \u001b[0m -0.0304637 0.0218061 -1.39703 0.162405\n",
+ " \u001b[1m X26 \u001b[0m -0.00106344 0.0191918 -0.0554112 0.955811\n",
+ " \u001b[1m X27 \u001b[0m -0.00818334 0.0193568 -0.422763 0.672468\n",
+ " \u001b[1m X28 \u001b[0m -0.00422613 0.0211684 -0.199643 0.84176\n",
+ " \u001b[1m X29 \u001b[0m 0.00493526 0.00780428 0.632379 0.527139\n",
+ " \u001b[1m X30 \u001b[0m -0.159519 0.0452999 -3.52141 0.000429263\n",
+ " \u001b[1m X31 \u001b[0m 0.0384506 0.0078611 4.89125 1.00199e-6\n",
+ "---\n",
+ "Signif. codes:\n",
+ "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n"
+ ]
"source": [
- "rlassoLATE(X, d, y, z)"
+ "r_summary(effects_female);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, we estimate and plot confident intervals, first ”pointwise” and then the joint confidence intervals."
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 14,
"metadata": {},
"outputs": [
- "data": {
- "text/plain": [
- "Dict{String, Any} with 5 entries:\n",
- " \"se\" => 3645.28\n",
- " \"individual\" => [-35580.5, -90558.0, -1.83628e5, -5303.13, -8.0766e5, 1.8866…\n",
- " \"sample_size\" => 9915\n",
- " \"te\" => 15323.2\n",
- " \"type\" => \"LATET\""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " \u001b[1m \u001b[0m \u001b[1m 2.5% \u001b[0m \u001b[1m 97.5% \u001b[0m\n",
+ "\n",
+ " \u001b[1m X1 \u001b[0m -0.295562 -0.0142847\n",
+ " \u001b[1m X17 \u001b[0m -0.136261 0.408452\n",
+ " \u001b[1m X18 \u001b[0m 0.0742004 0.199678\n",
+ " \u001b[1m X19 \u001b[0m -0.118061 0.164666\n",
+ " \u001b[1m X20 \u001b[0m 0.128705 0.245002\n",
+ " \u001b[1m X21 \u001b[0m -0.378365 0.433985\n",
+ " \u001b[1m X22 \u001b[0m -0.270462 0.0317919\n",
+ " \u001b[1m X23 \u001b[0m -0.0656411 0.0398615\n",
+ " \u001b[1m X24 \u001b[0m -0.0421792 0.0624564\n",
+ " \u001b[1m X25 \u001b[0m -0.0964645 0.0355371\n",
+ " \u001b[1m X26 \u001b[0m -0.055223 0.0530961\n",
+ " \u001b[1m X27 \u001b[0m -0.0632443 0.0468776\n",
+ " \u001b[1m X28 \u001b[0m -0.0662182 0.057766\n",
+ " \u001b[1m X29 \u001b[0m -0.0167011 0.0265717\n",
+ " \u001b[1m X30 \u001b[0m -0.285341 -0.0336976\n",
+ " \u001b[1m X31 \u001b[0m 0.0166404 0.0602608\n"
+ ]
"source": [
- "rlassoLATET(X, d, y, z)"
+ "joint_CI = r_confint(effects_female, 0.95, joint = true);\n",
+ "joint_CI;"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 4.4. Application: Estimation of the treatment effect in a linear model with many confounding factors"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "First, we load and prepare the data"
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "url = \"https://github.com/cran/hdm/raw/master/data/GrowthData.rda\";\n",
+ "GrowthData = load(download(url))[\"GrowthData\"];\n",
+ "y = GrowthData[:, 1];\n",
+ "d = GrowthData[:, 3];\n",
+ "X = Matrix(GrowthData[:, Not(1, 2, 3)]);\n",
+ "X_1 = Matrix(GrowthData[:, Not(1, 2)]);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we can estimate the effect of the initial GDP level. First, we estimate by OLS:"
+ ]
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "Q, R = qr(hcat(ones(length(y)), X_1))\n",
+ "β = pinv(hcat(ones(length(y)), X_1)) * y\n",
+ "\n",
+ "res = y - hcat(ones(length(y)), X_1) * β;\n",
+ "n = size(hcat(ones(length(y)), X_1))[1]\n",
+ "k = size(hcat(ones(length(y)), X_1))[2]\n",
+ "\n",
+ "sigma2_hat = (res' * res) / (n - k)\n",
+ "vcov_beta_hat = sigma2_hat .* inv(hcat(ones(length(y)), X_1)' * hcat(ones(length(y)), X_1));\n",
+ "se = sqrt.(diag(vcov_beta_hat))\n",
+ "\n",
+ "ls_effect = DataFrame(Estimate = β, stderror = se);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Second, we estimate the effect by the partialling out by Post-Lasso:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimates and significance testing of the effect of target variables\n",
+ " \u001b[1m Row \u001b[0m \u001b[1m Estimate. \u001b[0m \u001b[1m Std. Error \u001b[0m \u001b[1m t value \u001b[0m \u001b[1m Pr(>|t|) \u001b[0m\n",
+ "\n",
+ " 1 -0.0498115 0.0139364 -3.57421 0.000351288\n",
+ "---\n",
+ "Signif. codes:\n",
+ "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n"
+ ]
+ }
+ ],
+ "source": [
+ "lasso_effect = rlassoEffect(X, y, d, method = \"partialling out\");\n",
+ "r_summary(lasso_effect);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Third, we estimate the effect by the double selection method:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimates and significance testing of the effect of target variables\n",
+ " \u001b[1m Row \u001b[0m \u001b[1m Estimate. \u001b[0m \u001b[1m Std. Error \u001b[0m \u001b[1m t value \u001b[0m \u001b[1m Pr(>|t|) \u001b[0m\n",
+ "\n",
+ " 1 -0.0500059 0.0157914 -3.16666 0.00154203\n",
+ "---\n",
+ "Signif. codes:\n",
+ "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n"
+ ]
+ }
+ ],
+ "source": [
+ "doublesel_effect = rlassoEffect(X, y, d, method = \"double selection\");\n",
+ "r_summary(doublesel_effect);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We then collect results in a nice latex table:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "=================================== ========== =============\n",
+ " \u001b[1m \u001b[0m \u001b[1m Estimate \u001b[0m \u001b[1m Std. Error \u001b[0m\n",
+ "=================================== ========== =============\n",
+ " full reg via ols -0.01 0.02989\n",
+ " partial reg\\nvia post-lasso -0.05 0.01394\n",
+ " partial reg via double selection -0.05 0.01579\n",
+ "=================================== ========== =============\n"
+ ]
+ }
+ ],
+ "source": [
+ "table = zeros(3,2)\n",
+ "table[1,:] = [round(Matrix(ls_effect)[2,1], digits = 2), round.(Matrix(ls_effect)[2,2], digits = 5)]\n",
+ "table[2,:] = [round(lasso_effect.coefficients, digits =2), round(lasso_effect.se, digits = 5)]\n",
+ "table[3,:] = [round(doublesel_effect.coefficients, digits =2), round(doublesel_effect.se, digits = 5)];\n",
+ "index = [\"full reg via ols\", \"partial reg\n",
+ "via post-lasso \", \"partial reg via double selection\"]\n",
+ "pretty_table(hcat(index, table), show_row_number = false, header = [\" \", \"Estimate\", \"Std. Error\"], tf = tf_simple, nosubheader = true)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 5. Instrumental Variable Estimation in a High-Dimensional Setting"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 5.2. Application: Economic Development and Institutions."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "First, we process the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(64, 21)"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "url = \"https://github.com/cran/hdm/raw/master/data/AJR.rda\";\n",
+ "AJR = load(download(url))[\"AJR\"];\n",
+ "y = AJR[!,\"GDP\"]\n",
+ "d = AJR[!,\"Exprop\"]\n",
+ "z = AJR[!,\"logMort\"];\n",
+ "x_formula = @formula(GDP ~ -1 + Latitude + Latitude2 + Africa + Asia + Namer + Samer\n",
+ " + Latitude*Latitude2 + Latitude*Africa + Latitude*Asia + Latitude*Namer + Latitude*Samer\n",
+ " + Latitude2*Africa + Latitude2*Asia + Latitude2*Namer + Latitude2*Samer\n",
+ " + Africa*Asia + Africa*Namer + Africa*Samer\n",
+ " + Asia*Namer + Asia*Samer\n",
+ " + Namer*Samer)\n",
+ "x_dframe = ModelFrame( x_formula, AJR)\n",
+ "x1 = ModelMatrix(x_dframe)\n",
+ "x = x1.m\n",
+ "size(x)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Then we estimate an IV model with selection on the X"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "AJR_Xselect = rlassoIV(x, d, y, z, select_X=true, select_Z=false);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimates and Significance Testing of the effect of target variables in the IV regression model\n",
+ " \u001b[1m \u001b[0m \u001b[1m coeff. \u001b[0m \u001b[1m se. \u001b[0m \u001b[1m t-value \u001b[0m \u001b[1m p-value \u001b[0m\n",
+ "\n",
+ " d1 0.845027 0.269926 3.13059 0.00174458\n",
+ "---\n",
+ "Signif. codes:\n",
+ "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n"
+ ]
+ }
+ ],
+ "source": [
+ "r_summary(AJR_Xselect);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " \u001b[1m \u001b[0m \u001b[1m 2.5% \u001b[0m \u001b[1m 97.5% \u001b[0m\n",
+ "\n",
+ " d1 0.315981 1.37407\n"
+ ]
+ }
+ ],
+ "source": [
+ "r_confint(AJR_Xselect);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It is interesting to understand what the procedure above is doing. In essence, it partials out xi from\n",
+ "yi\n",
+ ", di and zi using Post-Lasso and applies the 2SLS to the residual quantities.\n",
+ "Let us investigate partialling out in more detail in this example. We can first try to use OLS for\n",
+ "partialling out:\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rY_1 = lm(hcat(ones(length(y)), Matrix(AJR[:,3:end])), y);\n",
+ "rY = y - predict(rY_1)\n",
+ "rD_1 = lm(hcat(ones(length(y)), Matrix(AJR[:,3:end])), d);\n",
+ "rD = d - predict(rD_1);\n",
+ "rZ_1 = lm(hcat(ones(length(y)), Matrix(AJR[:,3:end])), z);\n",
+ "rZ = z - predict(rZ_1);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "rY_1 = lm(@formula(GDP ~ Latitude + Latitude2 + Africa + Asia + Namer + Samer + Latitude*Latitude2 + Latitude*Africa + Latitude*Asia + Latitude*Namer + Latitude*Samer\n",
+ " + Latitude2*Africa + Latitude2*Asia + Latitude2*Namer + Latitude2*Samer + Africa*Asia + Africa*Namer + Africa*Samer\n",
+ " + Asia*Namer + Asia*Samer + Namer*Samer), AJR)\n",
+ "rY = y - predict(rY_1)\n",
+ "\n",
+ "rD_1 = lm(@formula(Exprop ~ Latitude + Latitude2 + Africa + Asia + Namer + Samer + Latitude*Latitude2 + Latitude*Africa + Latitude*Asia + Latitude*Namer + Latitude*Samer\n",
+ " + Latitude2*Africa + Latitude2*Asia + Latitude2*Namer + Latitude2*Samer + Africa*Asia + Africa*Namer + Africa*Samer\n",
+ " + Asia*Namer + Asia*Samer + Namer*Samer), AJR)\n",
+ "rD = d - predict(rD_1)\n",
+ "\n",
+ "rZ_1 = lm(@formula(logMort ~ Latitude + Latitude2 + Africa + Asia + Namer + Samer + Latitude*Latitude2 + Latitude*Africa + Latitude*Asia + Latitude*Namer + Latitude*Samer\n",
+ " + Latitude2*Africa + Latitude2*Asia + Latitude2*Namer + Latitude2*Samer + Africa*Asia + Africa*Namer + Africa*Samer\n",
+ " + Asia*Namer + Asia*Samer + Namer*Samer), AJR)\n",
+ "rZ = z - predict(rZ_1);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "1 rows × 2 columns
| Estimate | Std_Error |
| Float64 | Float64 |
1 | 1.26721 | 1.73054 |
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cc}\n",
+ "\t& Estimate & Std\\_Error\\\\\n",
+ "\t\\hline\n",
+ "\t& Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 1.26721 & 1.73054 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "\u001b[1m1×2 DataFrame\u001b[0m\n",
+ "\u001b[1m Row \u001b[0m│\u001b[1m Estimate \u001b[0m\u001b[1m Std_Error \u001b[0m\n",
+ "\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\n",
+ "─────┼─────────────────────\n",
+ " 1 │ 1.26721 1.73054"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "ivfit_lm = tsls(rD, rY, rZ, nothing, intercept=false)\n",
+ "DataFrame(Estimate = ivfit_lm[\"coefficients\"][1,2], Std_Error = ivfit_lm[\"se\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We see that the estimates exhibit large standard errors. The imprecision is expected because dimension\n",
+ "of x is quite large, comparable to the sample size.\n",
+ "Next, we replace the OLS operator by post-Lasso for partialling out"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "x_formula1 = @formula(GDP ~ Latitude + Latitude2 + Africa + Asia + Namer + Samer\n",
+ " + Latitude*Latitude2 + Latitude*Africa + Latitude*Asia + Latitude*Namer + Latitude*Samer\n",
+ " + Latitude2*Africa + Latitude2*Asia + Latitude2*Namer + Latitude2*Samer\n",
+ " + Africa*Asia + Africa*Namer + Africa*Samer\n",
+ " + Asia*Namer + Asia*Samer\n",
+ " + Namer*Samer)\n",
+ "x_dframe1 = ModelFrame( x_formula, AJR)\n",
+ "x1_1 = ModelMatrix(x_dframe)\n",
+ "xx = x1.m;"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "1 rows × 2 columns
| Estimate | Std_Error |
| Float64 | Float64 |
1 | 0.845027 | 0.272094 |
+ ],
+ "text/latex": [
+ "\\begin{tabular}{r|cc}\n",
+ "\t& Estimate & Std\\_Error\\\\\n",
+ "\t\\hline\n",
+ "\t& Float64 & Float64\\\\\n",
+ "\t\\hline\n",
+ "\t1 & 0.845027 & 0.272094 \\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/plain": [
+ "\u001b[1m1×2 DataFrame\u001b[0m\n",
+ "\u001b[1m Row \u001b[0m│\u001b[1m Estimate \u001b[0m\u001b[1m Std_Error \u001b[0m\n",
+ "\u001b[1m \u001b[0m│\u001b[90m Float64 \u001b[0m\u001b[90m Float64 \u001b[0m\n",
+ "─────┼─────────────────────\n",
+ " 1 │ 0.845027 0.272094"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "rY_1 = rlasso(xx, y);\n",
+ "rY = rY_1[\"residuals\"]\n",
+ "rD_1 = rlasso(xx, d);\n",
+ "rD = rD_1[\"residuals\"]\n",
+ "rZ_1 = rlasso(xx, z);\n",
+ "rZ = rZ_1[\"residuals\"]\n",
+ "\n",
+ "ivfit_lasso = tsls(rD, rY, rZ)\n",
+ "DataFrame(Estimate = ivfit_lasso[\"coefficients\"][1,2], Std_Error = ivfit_lasso[\"se\"][1])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 5.3. Application: Impact of Eminent Domain Decisions on Economic Outcomes."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "First, we load the data an construct the matrices with the controls (x), instruments (z), outcome (y),\n",
+ "and treatment variables (d). Here we consider regional GDP as the outcome variable."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url = \"https://github.com/cran/hdm/raw/master/data/EminentDomain.rda\";\n",
+ "EminentDomain = load(download(url))[\"EminentDomain\"];\n",
+ "z = EminentDomain[\"logGDP\"][\"z\"];\n",
+ "x = EminentDomain[\"logGDP\"][\"x\"];\n",
+ "d = EminentDomain[\"logGDP\"][\"d\"];\n",
+ "y = EminentDomain[\"logGDP\"][\"y\"];\n",
+ "x = x[:, (mean(x, dims = 1) .> 0.05)'];\n",
+ "z = z[:, (mean(z, dims = 1) .> 0.05)'];"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As mentioned above, y is the economic outcome, the logarithm of the GDP, d the number of pro\n",
+ "plaintiff appellate takings decisions in federal circuit court c and year t, x is a matrix with control\n",
+ "variables, and z is the matrix with instruments. Here we consider socio-economic and demographic\n",
+ "characteristics of the judges as instruments.\n",
+ "First, we estimate the effect of the treatment variable by simple OLS and 2SLS using two instruments:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ED_ols = lm(hcat(ones(length(vec(y))), hcat(d, x)), vec(y));\n",
+ "ED_2sls = tsls(d, y, z[:,1:2], x, intercept = false);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Next, we estimate the model with selection on the instruments.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "lasso_IV_Z = rlassoIV(x, d, y, z, select_X = false, select_Z = true);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimates and Significance Testing of the effect of target variables in the IV regression model\n",
+ " \u001b[1m \u001b[0m \u001b[1m coeff. \u001b[0m \u001b[1m se. \u001b[0m \u001b[1m t-value \u001b[0m \u001b[1m p-value \u001b[0m\n",
+ "\n",
+ " d1 0.414602 0.290249 1.42843 0.153167\n",
+ "---\n",
+ "Signif. codes:\n",
+ "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n"
+ ]
+ }
+ ],
+ "source": [
+ "r_summary(lasso_IV_Z);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " \u001b[1m \u001b[0m \u001b[1m 2.5% \u001b[0m \u001b[1m 97.5% \u001b[0m\n",
+ "\n",
+ " d1 -0.154276 0.98348\n"
+ ]
+ }
+ ],
+ "source": [
+ "r_confint(lasso_IV_Z);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, we do selection on both the x and z variables."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimates and Significance Testing of the effect of target variables in the IV regression model\n",
+ " \u001b[1m \u001b[0m \u001b[1m coeff. \u001b[0m \u001b[1m se. \u001b[0m \u001b[1m t-value \u001b[0m \u001b[1m p-value \u001b[0m\n",
+ "\n",
+ " d1 -0.0238347 0.128507 -0.185475 0.852857\n",
+ "---\n",
+ "Signif. codes:\n",
+ "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n"
+ ]
+ }
+ ],
+ "source": [
+ "lasso_IV_XZ = rlassoIV(x, d, y, z, select_X = true, select_Z = true);\n",
+ "r_summary(lasso_IV_XZ);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " \u001b[1m \u001b[0m \u001b[1m 2.5% \u001b[0m \u001b[1m 97.5% \u001b[0m\n",
+ "\n",
+ " d1 -0.275703 0.228033\n"
+ ]
+ }
+ ],
+ "source": [
+ "r_confint(lasso_IV_XZ);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, we compare all results"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimates and Significance Testing of the effect of target variables in the IV regression model\n",
+ " \u001b[1m \u001b[0m \u001b[1m coeff. \u001b[0m \u001b[1m se. \u001b[0m \u001b[1m t-value \u001b[0m \u001b[1m p-value \u001b[0m\n",
+ "\n",
+ " d1 0.414602 0.290249 1.42843 0.153167\n",
+ "---\n",
+ "Signif. codes:\n",
+ "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n",
+ "Estimates and Significance Testing of the effect of target variables in the IV regression model\n",
+ " \u001b[1m \u001b[0m \u001b[1m coeff. \u001b[0m \u001b[1m se. \u001b[0m \u001b[1m t-value \u001b[0m \u001b[1m p-value \u001b[0m\n",
+ "\n",
+ " d1 -0.0238347 0.128507 -0.185475 0.852857\n",
+ "---\n",
+ "Signif. codes:\n",
+ "0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n",
+ "======================= ============ =============\n",
+ " \u001b[1m \u001b[0m \u001b[1m Estimate \u001b[0m \u001b[1m Std. Error \u001b[0m\n",
+ "======================= ============ =============\n",
+ " ols regression 0.00786473 0.00986593\n",
+ " IV estimation -0.0107097 0.0337652\n",
+ " selection on Z 0.414602 0.290249\n",
+ " selection on X and Z -0.0238347 0.128507\n",
+ "======================= ============ =============\n"
+ ]
+ }
+ ],
+ "source": [
+ "table = zeros(4,2)\n",
+ "table[1,:] = [coef(ED_ols)[2], stderror(ED_ols)[2]]\n",
+ "table[2,:] = [ED_2sls[\"coefficients\"][1,2], ED_2sls[\"se\"][1]]\n",
+ "table[3,:] = Matrix(r_summary(lasso_IV_Z)[:,2:3]);\n",
+ "table[4, :] = Matrix(r_summary(lasso_IV_XZ)[:, 2:3]);\n",
+ "index = [\"ols regression\", \"IV estimation \", \"selection on Z\", \"selection on X and Z\"]\n",
+ "pretty_table(hcat(index, table), show_row_number = false, header = [\" \", \"Estimate\", \"Std. Error\"], tf = tf_simple, nosubheader = true)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 6. Inference on Treatment Effects in a High-Dimensional Setting"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 6.3. Application: 401(k) plan participation."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Again, we start first with the data preparation:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url = \"https://github.com/cran/hdm/raw/master/data/pension.rda\";\n",
+ "pension = load(download(url))[\"pension\"];\n",
+ "y = pension[:, \"tw\"];\n",
+ "d = pension[:, \"p401\"];\n",
+ "z = pension[:, \"e401\"];\n",
+ "X = Matrix(pension[:, [\"i2\", \"i3\", \"i4\", \"i5\", \"i6\", \"i7\", \"a2\", \"a3\", \"a4\", \"a5\", \"fsize\", \"hs\", \"smcol\", \"col\", \"marr\", \"twoearn\", \"db\", \"pira\", \"hown\"]]);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we can compute the estimates of the target treatment effect parameters. For ATE and ATET we\n",
+ "report the the effect of eligibility for 401(k)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " ------\n",
+ " Post-Lasso estimation: true\n",
+ " Intercept: true\n",
+ " Control: 0\n",
+ " Total number of variables: 19\n",
+ " Number of selected variables: 9 \n",
+ " ------\n",
+ " \n",
+ " \n",
+ "============ ============\n",
+ " \u001b[1m Variable \u001b[0m \u001b[1m Estimate \u001b[0m\n",
+ "============ ============\n",
+ " Intercept -2.07033\n",
+ " V 1 -0.237913\n",
+ " V 3 0.618819\n",
+ " V 4 0.846136\n",
+ " V 5 1.10569\n",
+ " V 6 1.34217\n",
+ " V 10 -0.33151\n",
+ " V 16 0.0382348\n",
+ " V 17 0.620232\n",
+ " V 18 0.335563\n",
+ "============ ============\n",
+ "rlassologit\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: ATE\n",
+ " Bootstrap: none\n",
+ " \n",
+ "========== ========= ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ========= ==========\n",
+ " 10180.1 1930.68 5.2728\n",
+ "========== ========= ==========\n"
+ ]
+ }
+ ],
+ "source": [
+ "pension_ate = rlassoATE(X, d, y);\n",
+ "r_summary(pension_ate);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " ------\n",
+ " Post-Lasso estimation: true\n",
+ " Intercept: true\n",
+ " Control: 0\n",
+ " Total number of variables: 19\n",
+ " Number of selected variables: 6 \n",
+ " ------\n",
+ " \n",
+ " \n",
+ "============ ============\n",
+ " \u001b[1m Variable \u001b[0m \u001b[1m Estimate \u001b[0m\n",
+ "============ ============\n",
+ " Intercept -1.79587\n",
+ " V 1 -0.608675\n",
+ " V 5 0.622942\n",
+ " V 6 0.839653\n",
+ " V 16 0.199394\n",
+ " V 17 0.643286\n",
+ " V 18 0.374925\n",
+ "============ ============\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rlassologit\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: ATET\n",
+ " Bootstrap: none\n",
+ " \n",
+ "========== ========= ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ========= ==========\n",
+ " 12628.5 2944.43 4.28893\n",
+ "========== ========= ==========\n"
+ ]
+ }
+ ],
+ "source": [
+ "pension_atet = rlassoATET(X, d, y);\n",
+ "r_summary(pension_atet);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For LATE and LATET we estimate the effect of 401(k) participation (d) with plan eligibility (z) as\n",
+ "instrument."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " ------\n",
+ " Post-Lasso estimation: true\n",
+ " Intercept: true\n",
+ " Control: 0\n",
+ " Total number of variables: 19\n",
+ " Number of selected variables: 10 \n",
+ " ------\n",
+ " \n",
+ " \n",
+ "============ ============\n",
+ " \u001b[1m Variable \u001b[0m \u001b[1m Estimate \u001b[0m\n",
+ "============ ============\n",
+ " Intercept -1.58403\n",
+ " V 1 -0.329602\n",
+ " V 3 0.657641\n",
+ " V 4 0.836492\n",
+ " V 5 1.11528\n",
+ " V 6 1.21348\n",
+ " V 8 0.142622\n",
+ " V 10 -0.299557\n",
+ " V 16 0.0516196\n",
+ " V 17 1.03219\n",
+ " V 18 0.135758\n",
+ "============ ============\n",
+ "rlassologit\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: LATE\n",
+ " Bootstrap: none\n",
+ " \n",
+ "========== ======== ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ======== ==========\n",
+ " 12992.1 2326.9 5.58344\n",
+ "========== ======== ==========\n"
+ ]
+ }
+ ],
+ "source": [
+ "pension_late = rlassoLATE(X, d, y, z);\n",
+ "r_summary(pension_late);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 112,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " ------\n",
+ " Post-Lasso estimation: true\n",
+ " Intercept: true\n",
+ " Control: 0\n",
+ " Total number of variables: 19\n",
+ " Number of selected variables: 5 \n",
+ " ------\n",
+ " \n",
+ " \n",
+ "============ ============\n",
+ " \u001b[1m Variable \u001b[0m \u001b[1m Estimate \u001b[0m\n",
+ "============ ============\n",
+ " Intercept -1.25636\n",
+ " V 1 -0.714199\n",
+ " V 5 0.677564\n",
+ " V 6 0.794049\n",
+ " V 16 0.212127\n",
+ " V 17 1.05388\n",
+ "============ ============\n",
+ "rlassologit\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: LATET\n",
+ " Bootstrap: none\n",
+ " \n",
+ "========== ========= ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ========= ==========\n",
+ " 15323.2 3645.28 4.20357\n",
+ "========== ========= ==========\n"
+ ]
+ }
+ ],
+ "source": [
+ "pension_latet = rlassoLATET(X, d, y, z);\n",
+ "r_summary(pension_latet);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For LATE and LATET we estimate the effect of 401(k) participation (d) with plan eligibility (z) as\n",
+ "instrument."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 153,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: ATE\n",
+ " Bootstrap: none\n",
+ " \n",
+ "========== ========= ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ========= ==========\n",
+ " 10180.1 1930.68 5.2728\n",
+ "========== ========= ==========\n",
+ "\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: ATET\n",
+ " Bootstrap: none\n",
+ " \n",
+ "========== ========= ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ========= ==========\n",
+ " 12628.5 2944.43 4.28893\n",
+ "========== ========= ==========\n",
+ "\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: LATE\n",
+ " Bootstrap: none\n",
+ " \n",
+ "========== ======== ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ======== ==========\n",
+ " 12992.1 2326.9 5.58344\n",
+ "========== ======== ==========\n",
+ "\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: LATET\n",
+ " Bootstrap: none\n",
+ " \n",
+ "========== ========= ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ========= ==========\n",
+ " 15323.2 3645.28 4.20357\n",
+ "========== ========= ==========\n"
+ ]
+ }
+ ],
+ "source": [
+ "using PrettyTables\n",
+ "table = zeros(4,2)\n",
+ "table[1,:] = round.(vec(r_summary(pension_ate)[:, 1:2]), digits = 2);\n",
+ "table[2,:] = round.(vec(r_summary(pension_atet)[:, 1:2]), digits = 2);\n",
+ "table[3,:] = round.(vec(r_summary(pension_late)[:, 1:2]), digits = 2);\n",
+ "table[4,:] = round.(vec(r_summary(pension_latet)[:, 1:2]), digits = 2);\n",
+ "index = [\"ATE\", \"ATET \", \"LATE\", \"LATET\"];"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 152,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "======== ========== =============\n",
+ " \u001b[1m \u001b[0m \u001b[1m Estimate \u001b[0m \u001b[1m Std. Error \u001b[0m\n",
+ "======== ========== =============\n",
+ " ATE 10180.1 1930.68\n",
+ " ATET 12628.5 2944.43\n",
+ " LATE 12992.1 2326.9\n",
+ " LATET 15323.2 3645.28\n",
+ "======== ========== =============\n"
+ ]
+ }
+ ],
+ "source": [
+ "pretty_table(hcat(index, table), show_row_number = false, \n",
+ " header = [\" \", \"Estimate\", \"Std. Error\"], tf = tf_simple, nosubheader = true)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally, we estimate a model including all interaction effects:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 174,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " ------\n",
+ " Post-Lasso estimation: true\n",
+ " Intercept: true\n",
+ " Control: 0\n",
+ " Total number of variables: 19\n",
+ " Number of selected variables: 10 \n",
+ " ------\n",
+ " \n",
+ " \n",
+ "============ ============\n",
+ " \u001b[1m Variable \u001b[0m \u001b[1m Estimate \u001b[0m\n",
+ "============ ============\n",
+ " Intercept -1.58403\n",
+ " V 1 -0.329602\n",
+ " V 3 0.657641\n",
+ " V 4 0.836492\n",
+ " V 5 1.11528\n",
+ " V 6 1.21348\n",
+ " V 8 0.142622\n",
+ " V 10 -0.299557\n",
+ " V 16 0.0516196\n",
+ " V 17 1.03219\n",
+ " V 18 0.135758\n",
+ "============ ============\n",
+ "rlassologit"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " ------\n",
+ " Post-Lasso estimation: true\n",
+ " Intercept: true\n",
+ " Control: 0\n",
+ " Total number of variables: 19\n",
+ " Number of selected variables: 5 \n",
+ " ------\n",
+ " \n",
+ " \n",
+ "============ ============\n",
+ " \u001b[1m Variable \u001b[0m \u001b[1m Estimate \u001b[0m\n",
+ "============ ============\n",
+ " Intercept -1.25636\n",
+ " V 1 -0.714199\n",
+ " V 5 0.677564\n",
+ " V 6 0.794049\n",
+ " V 16 0.212127\n",
+ " V 17 1.05388\n",
+ "============ ============\n",
+ "rlassologit"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " ------\n",
+ " Post-Lasso estimation: true\n",
+ " Intercept: true\n",
+ " Control: 0\n",
+ " Total number of variables: 19\n",
+ " Number of selected variables: 10 \n",
+ " ------\n",
+ " \n",
+ " \n",
+ "============ ============\n",
+ " \u001b[1m Variable \u001b[0m \u001b[1m Estimate \u001b[0m\n",
+ "============ ============\n",
+ " Intercept -1.58403\n",
+ " V 1 -0.329602\n",
+ " V 3 0.657641\n",
+ " V 4 0.836492\n",
+ " V 5 1.11528\n",
+ " V 6 1.21348\n",
+ " V 8 0.142622\n",
+ " V 10 -0.299557\n",
+ " V 16 0.0516196\n",
+ " V 17 1.03219\n",
+ " V 18 0.135758\n",
+ "============ ============\n",
+ "rlassologit"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " ------\n",
+ " Post-Lasso estimation: true\n",
+ " Intercept: true\n",
+ " Control: 0\n",
+ " Total number of variables: 19\n",
+ " Number of selected variables: 5 \n",
+ " ------\n",
+ " \n",
+ " \n",
+ "============ ============\n",
+ " \u001b[1m Variable \u001b[0m \u001b[1m Estimate \u001b[0m\n",
+ "============ ============\n",
+ " Intercept -1.25636\n",
+ " V 1 -0.714199\n",
+ " V 5 0.677564\n",
+ " V 6 0.794049\n",
+ " V 16 0.212127\n",
+ " V 17 1.05388\n",
+ "============ ============\n",
+ "rlassologit"
+ ]
+ }
+ ],
+ "source": [
+ "pension_ate = rlassoATE(X, z, y);\n",
+ "pension_atet = rlassoATET(X, z, y);\n",
+ "pension_late = rlassoLATE(X, d, y, z);\n",
+ "pension_latet = rlassoLATET(X, d, y, z);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 182,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: ATE\n",
+ " Bootstrap: none\n",
+ " \n",
+ "========== ========= ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ========= ==========\n",
+ " 8491.99 1902.92 4.4626\n",
+ "========== ========= ==========\n",
+ "\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: ATET\n",
+ " Bootstrap: none\n",
+ " \n",
+ "========== ========= ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ========= ==========\n",
+ " 10795.3 2568.13 4.20357\n",
+ "========== ========= ==========\n",
+ "\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: LATE\n",
+ " Bootstrap: none\n",
+ " \n",
+ "========== ======== ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ======== ==========\n",
+ " 12992.1 2326.9 5.58344\n",
+ "========== ======== ==========\n",
+ "\n",
+ " Estimation and significance tesing of the treatment effect\n",
+ " Type: LATET\n",
+ " Bootstrap: none\n",
+ " \n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "========== ========= ==========\n",
+ " \u001b[1m Coeff \u001b[0m \u001b[1m SE \u001b[0m \u001b[1m t.value \u001b[0m\n",
+ "========== ========= ==========\n",
+ " 15323.2 3645.28 4.20357\n",
+ "========== ========= ==========\n"
+ ]
+ }
+ ],
+ "source": [
+ "table = zeros(4, 2)\n",
+ "table[1,:] = r_summary(pension_ate)[:, 1:2]\n",
+ "table[2,:] = r_summary(pension_atet)[:, 1:2]\n",
+ "table[3,:] = r_summary(pension_late)[:, 1:2]\n",
+ "table[4,:] = r_summary(pension_latet)[:, 1:2];"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 183,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "======== ========== =============\n",
+ " \u001b[1m \u001b[0m \u001b[1m Estimate \u001b[0m \u001b[1m Std. Error \u001b[0m\n",
+ "======== ========== =============\n",
+ " ATE 8491.99 1902.92\n",
+ " ATET 10795.3 2568.13\n",
+ " LATE 12992.1 2326.9\n",
+ " LATET 15323.2 3645.28\n",
+ "======== ========== =============\n"
+ ]
+ }
+ ],
+ "source": [
+ "index = [\"ATE\", \"ATET \", \"LATE\", \"LATET\"]\n",
+ "pretty_table(hcat(index, table), show_row_number = false, \n",
+ " header = [\" \", \"Estimate\", \"Std. Error\"], tf = tf_simple, nosubheader = true)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 7. The Lasso Methods for Discovery of Significant Causes amongst Many Potential Causes, with Many Controls\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "url = \"https://raw.githubusercontent.com/d2cml-ai/HDMjl.jl/prueba/data/7_.csv\"\n",
+ "data = DataFrame(CSV.read(download(url), DataFrame));\n",
+ "n, p = size(data);\n",
+ "p1 = 20;\n",
+ "X = data[:,2:end]\n",
+ "Y = data[:,1];"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " \u001b[1m \u001b[0m \u001b[1m 2.5% \u001b[0m \u001b[1m 97.5% \u001b[0m\n",
+ "\n",
+ " \u001b[1m X1 \u001b[0m 4.51533 5.21356\n",
+ " \u001b[1m X2 \u001b[0m -0.313634 0.30429\n",
+ " \u001b[1m X3 \u001b[0m -0.351839 0.186217\n",
+ " \u001b[1m X4 \u001b[0m -0.253669 0.286815\n",
+ " \u001b[1m X5 \u001b[0m -0.275994 0.275686\n",
+ " \u001b[1m X6 \u001b[0m -0.320815 0.293574\n",
+ " \u001b[1m X7 \u001b[0m -0.225692 0.300383\n",
+ " \u001b[1m X8 \u001b[0m -0.0468017 0.473111\n",
+ " \u001b[1m X9 \u001b[0m -0.185952 0.389624\n",
+ " \u001b[1m X10 \u001b[0m -0.236704 0.26358\n",
+ " \u001b[1m X11 \u001b[0m -0.314153 0.208903\n",
+ " \u001b[1m X12 \u001b[0m -0.308581 0.265112\n",
+ " \u001b[1m X13 \u001b[0m -0.173571 0.376241\n",
+ " \u001b[1m X14 \u001b[0m -0.322822 0.38468\n",
+ " \u001b[1m X15 \u001b[0m -0.321303 0.312452\n",
+ " \u001b[1m X16 \u001b[0m -0.264319 0.330375\n",
+ " \u001b[1m X17 \u001b[0m -0.178576 0.41633\n",
+ " \u001b[1m X18 \u001b[0m -0.368883 0.046518\n",
+ " \u001b[1m X19 \u001b[0m -0.10678 0.393157\n",
+ " \u001b[1m X20 \u001b[0m -0.215219 0.254939\n"
+ ]
+ }
+ ],
+ "source": [
+ "r_confint(rlassoEffects(X, Y, index = [1:p1;]), joint = true);"
+ ]
"metadata": {
diff --git a/tutorial/r_file.ipynb b/tutorial/r_file.ipynb
new file mode 100644
index 0000000..1d9ec5e
--- /dev/null
+++ b/tutorial/r_file.ipynb
@@ -0,0 +1,2027 @@
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 2. How to get started"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Warning message:\n",
+ "\"package 'hdm' was built under R version 4.2.1\"\n",
+ "Warning message:\n",
+ "\"package 'glmnet' was built under R version 4.2.1\"\n",
+ "Loading required package: Matrix\n",
+ "\n",
+ "Loaded glmnet 4.1-4\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "library(hdm)\n",
+ "library(stats)\n",
+ "library(glmnet)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.1. Prediction in Linear Models using Approximate Sparsity"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 3.2. A Joint Significance Test for Lasso Regression."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "set.seed(12345)\n",
+ "n = 100 #sample size\n",
+ "p = 100 # number of variables\n",
+ "s = 3 # nubmer of variables with non-zero coefficients\n",
+ "X = matrix(rnorm(n * p), ncol = p)\n",
+ "beta = c(rep(5, s), rep(0, p - s))\n",
+ "Y = X %*% beta + rnorm(n)\n",
+ "#data = data.frame(cbind(Y, X))\n",
+ "#write.csv(data,\"../data/3_2.csv\", row.names = FALSE)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Call:\n",
+ "rlasso.formula(formula = Y ~ X, post = FALSE)\n",
+ "\n",
+ "Post-Lasso Estimation: FALSE \n",
+ "\n",
+ "Total number of variables: 100\n",
+ "Number of selected variables: 11 \n",
+ "\n",
+ "Residuals: \n",
+ " Min 1Q Median 3Q Max \n",
+ "-2.09008 -0.45801 -0.01237 0.50291 2.25098 \n",
+ "\n",
+ " Estimate\n",
+ "(Intercept) 0.057\n",
+ "1 4.771\n",
+ "2 4.693\n",
+ "3 4.766\n",
+ "13 -0.045\n",
+ "15 -0.047\n",
+ "16 -0.005\n",
+ "19 -0.092\n",
+ "22 -0.027\n",
+ "40 -0.011\n",
+ "61 0.114\n",
+ "100 -0.025\n",
+ "\n",
+ "Residual standard error: 0.8039\n",
+ "Multiple R-squared: 0.9913\n",
+ "Adjusted R-squared: 0.9902\n",
+ "Joint significance test:\n",
+ " the sup score statistic for joint significance test is 64.02 with a p-value of 0\n"
+ ]
+ }
+ ],
+ "source": [
+ "lasso.reg = rlasso(Y ~ X, post = FALSE) # use lasso, not-Post-lasso\n",
+ "# lasso.reg = rlasso(X, Y, post=FALSE)\n",
+ "sum.lasso <- summary(lasso.reg, all = FALSE) # can also do print(lasso.reg, all=FALSE)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "set.seed(123)\n",
+ "yhat.lasso = predict(lasso.reg) #in-sample prediction\n",
+ "Xnew = matrix(rnorm(n * p), ncol = p) # new X\n",
+ "Ynew = Xnew %*% beta + rnorm(n) #new Y\n",
+ "yhat.lasso.new = predict(lasso.reg, newdata = Xnew) #out-of-sample prediction\n",
+ "# data = data.frame(cbind(Ynew, Xnew))\n",
+ "# write.csv(data,\"../data/3_2_2.csv\", row.names = FALSE)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Call:\n",
+ "rlasso.formula(formula = Y ~ X, post = TRUE)\n",
+ "\n",
+ "(Intercept) 1 2 3 \n",
+ " 0.0341 4.9241 4.8579 4.9644 \n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "post.lasso.reg = rlasso(Y ~ X, post = TRUE) #now use post-lasso\n",
+ "print(post.lasso.reg, all = FALSE) # or use summary(post.lasso.reg, all=FALSE)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " lasso MAE Post-lasso MAE \n",
+ " 0.88 0.78 \n"
+ ]
+ }
+ ],
+ "source": [
+ "yhat.postlasso = predict(post.lasso.reg) #in-sample prediction\n",
+ "yhat.postlasso.new = predict(post.lasso.reg, newdata = Xnew) #out-of-sample prediction\n",
+ "MAE <- apply(cbind(abs(Ynew - yhat.lasso.new), abs(Ynew - yhat.postlasso.new)), 2,\n",
+ "mean)\n",
+ "names(MAE) <- c(\"lasso MAE\", \"Post-lasso MAE\")\n",
+ "print(MAE, digits = 2) # MAE for Lasso and Post-Lasso"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 4. Inference on Target Regression Coefficients"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 4.1. Intuition for the Orthogonality Principle in Linear Models via Partialling Out."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "set.seed(1)\n",
+ "n = 5000\n",
+ "p = 20\n",
+ "X = matrix(rnorm(n * p), ncol = p)\n",
+ "colnames(X) = c(\"d\", paste(\"x\", 1:19, sep = \"\"))\n",
+ "xnames = colnames(X)[-1]\n",
+ "beta = rep(1, 20)\n",
+ "y = X %*% beta + rnorm(n)\n",
+ "dat = data.frame(y = y, X)\n",
+ "#save(dat, file = \"../data/4_1.csv\")\n",
+ "#write.csv(dat,\"../data/4_1.csv\", row.names = FALSE)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "- Estimate
- 0.978074548374755
- Std. Error
- 0.0137122468163772
+ ],
+ "text/latex": [
+ "\\begin{description*}\n",
+ "\\item[Estimate] 0.978074548374755\n",
+ "\\item[Std. Error] 0.0137122468163772\n",
+ "\\end{description*}\n"
+ ],
+ "text/markdown": [
+ "Estimate\n",
+ ": 0.978074548374755Std. Error\n",
+ ": 0.0137122468163772\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Estimate Std. Error \n",
+ "0.97807455 0.01371225 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# full fit\n",
+ "fmla = as.formula(paste(\"y ~ \", paste(colnames(X), collapse = \"+\")))\n",
+ "full.fit = lm(fmla, data = dat)\n",
+ "summary(full.fit)$coef[\"d\", 1:2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "- Estimate
- 0.978074548374761
- Std. Error
- 0.0136861583043357
+ ],
+ "text/latex": [
+ "\\begin{description*}\n",
+ "\\item[Estimate] 0.978074548374761\n",
+ "\\item[Std. Error] 0.0136861583043357\n",
+ "\\end{description*}\n"
+ ],
+ "text/markdown": [
+ "Estimate\n",
+ ": 0.978074548374761Std. Error\n",
+ ": 0.0136861583043357\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Estimate Std. Error \n",
+ "0.97807455 0.01368616 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "fmla.y = as.formula(paste(\"y ~ \", paste(xnames, collapse = \"+\")))\n",
+ "fmla.d = as.formula(paste(\"d ~ \", paste(xnames, collapse = \"+\")))\n",
+ "# partial fit via ols\n",
+ "rY = lm(fmla.y, data = dat)$res\n",
+ "rD = lm(fmla.d, data = dat)$res\n",
+ "partial.fit.ls = lm(rY ~ rD)\n",
+ "summary(partial.fit.ls)$coef[\"rD\", 1:2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "- Estimate
- 0.972738704584573
- Std. Error
- 0.0136867656564768
+ ],
+ "text/latex": [
+ "\\begin{description*}\n",
+ "\\item[Estimate] 0.972738704584573\n",
+ "\\item[Std. Error] 0.0136867656564768\n",
+ "\\end{description*}\n"
+ ],
+ "text/markdown": [
+ "Estimate\n",
+ ": 0.972738704584573Std. Error\n",
+ ": 0.0136867656564768\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Estimate Std. Error \n",
+ "0.97273870 0.01368677 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "rY = rlasso(fmla.y, data = dat)$res\n",
+ "rD = rlasso(fmla.d, data = dat)$res\n",
+ "partial.fit.postlasso = lm(rY ~ rD)\n",
+ "summary(partial.fit.postlasso)$coef[\"rD\", 1:2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
+ "A matrix: 1 × 4 of type dbl\n",
+ "\n",
+ "\tEstimate. | Std. Error | t value | Pr(>|t|) |
+ "\n",
+ "\n",
+ "\t0.9727387 | 0.01368677 | 71.07148 | 0 |
+ "\n",
+ "
+ ],
+ "text/latex": [
+ "A matrix: 1 × 4 of type dbl\n",
+ "\\begin{tabular}{llll}\n",
+ " Estimate. & Std. Error & t value & Pr(>\\textbar{}t\\textbar{})\\\\\n",
+ "\\hline\n",
+ "\t 0.9727387 & 0.01368677 & 71.07148 & 0\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A matrix: 1 × 4 of type dbl\n",
+ "\n",
+ "| Estimate. | Std. Error | t value | Pr(>|t|) |\n",
+ "|---|---|---|---|\n",
+ "| 0.9727387 | 0.01368677 | 71.07148 | 0 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Estimate. Std. Error t value Pr(>|t|)\n",
+ "[1,] 0.9727387 0.01368677 71.07148 0 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "Eff = rlassoEffect(X[, -1], y, X[, 1], method = \"partialling out\")\n",
+ "summary(Eff)$coef"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "- Estimate.
- 0.978074548374755
- Std. Error
- 0.0141562427002124
+ ],
+ "text/latex": [
+ "\\begin{description*}\n",
+ "\\item[Estimate.] 0.978074548374755\n",
+ "\\item[Std. Error] 0.0141562427002124\n",
+ "\\end{description*}\n"
+ ],
+ "text/markdown": [
+ "Estimate.\n",
+ ": 0.978074548374755Std. Error\n",
+ ": 0.0141562427002124\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Estimate. Std. Error \n",
+ "0.97807455 0.01415624 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "Eff = rlassoEffect(X[, -1], y, X[, 1], method = \"double selection\")\n",
+ "summary(Eff)$coef[, 1:2]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 4.2. Inference: Confidence Intervals and Significance Testing. The function rlassoEffects"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "set.seed(1)\n",
+ "n = 100 #sample size\n",
+ "p = 100 # number of variables\n",
+ "s = 3 # nubmer of non-zero variables\n",
+ "X = matrix(rnorm(n * p), ncol = p)\n",
+ "colnames(X) <- paste(\"X\", 1:p, sep = \"\")\n",
+ "beta = c(rep(3, s), rep(0, p - s))\n",
+ "y = 1 + X %*% beta + rnorm(n)\n",
+ "data = data.frame(cbind(y, X))\n",
+ "#write.csv(data,\"../data/4_2.csv\", row.names = FALSE)\n",
+ "colnames(data)[1] <- \"y\"\n",
+ "fm = paste(\"y ~\", paste(colnames(X), collapse = \"+\"))\n",
+ "fm = as.formula(fm)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Call:\n",
+ "rlassoEffects.formula(formula = fm, data = data, I = ~X1 + X2 + \n",
+ " X3 + X50)\n",
+ "\n",
+ "Coefficients:\n",
+ " X1 X2 X3 X50 \n",
+ "2.94448 3.04127 2.97540 0.07196 \n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# lasso.effect = rlassoEffects(X, y, index=c(1,2,3,50))\n",
+ "lasso.effect = rlassoEffects(fm, I = ~X1 + X2 + X3 + X50, data = data)\n",
+ "print(lasso.effect)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[1] \"Estimates and significance testing of the effect of target variables\"\n",
+ " Estimate. Std. Error t value Pr(>|t|) \n",
+ "X1 2.94448 0.08815 33.404 <2e-16 ***\n",
+ "X2 3.04127 0.08389 36.253 <2e-16 ***\n",
+ "X3 2.97540 0.07804 38.127 <2e-16 ***\n",
+ "X50 0.07196 0.07765 0.927 0.354 \n",
+ "---\n",
+ "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n",
+ "\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "summary(lasso.effect)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A matrix: 4 × 2 of type dbl\n",
+ "\n",
+ "\t | 2.5 % | 97.5 % |
+ "\n",
+ "\n",
+ "\tX1 | 2.77171308 | 3.1172421 |
+ "\tX2 | 2.87685121 | 3.2056979 |
+ "\tX3 | 2.82244962 | 3.1283583 |
+ "\tX50 | -0.08022708 | 0.2241377 |
+ "\n",
+ "
+ ],
+ "text/latex": [
+ "A matrix: 4 × 2 of type dbl\n",
+ "\\begin{tabular}{r|ll}\n",
+ " & 2.5 \\% & 97.5 \\%\\\\\n",
+ "\\hline\n",
+ "\tX1 & 2.77171308 & 3.1172421\\\\\n",
+ "\tX2 & 2.87685121 & 3.2056979\\\\\n",
+ "\tX3 & 2.82244962 & 3.1283583\\\\\n",
+ "\tX50 & -0.08022708 & 0.2241377\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A matrix: 4 × 2 of type dbl\n",
+ "\n",
+ "| | 2.5 % | 97.5 % |\n",
+ "|---|---|---|\n",
+ "| X1 | 2.77171308 | 3.1172421 |\n",
+ "| X2 | 2.87685121 | 3.2056979 |\n",
+ "| X3 | 2.82244962 | 3.1283583 |\n",
+ "| X50 | -0.08022708 | 0.2241377 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " 2.5 % 97.5 % \n",
+ "X1 2.77171308 3.1172421\n",
+ "X2 2.87685121 3.2056979\n",
+ "X3 2.82244962 3.1283583\n",
+ "X50 -0.08022708 0.2241377"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "confint(lasso.effect)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A matrix: 4 × 2 of type dbl\n",
+ "\n",
+ "\t | 2.5 % | 97.5 % |
+ "\n",
+ "\n",
+ "\tX1 | 2.7357949 | 3.1531603 |
+ "\tX2 | 2.8445200 | 3.2380291 |
+ "\tX3 | 2.7902789 | 3.1605290 |
+ "\tX50 | -0.1086592 | 0.2525698 |
+ "\n",
+ "
+ ],
+ "text/latex": [
+ "A matrix: 4 × 2 of type dbl\n",
+ "\\begin{tabular}{r|ll}\n",
+ " & 2.5 \\% & 97.5 \\%\\\\\n",
+ "\\hline\n",
+ "\tX1 & 2.7357949 & 3.1531603\\\\\n",
+ "\tX2 & 2.8445200 & 3.2380291\\\\\n",
+ "\tX3 & 2.7902789 & 3.1605290\\\\\n",
+ "\tX50 & -0.1086592 & 0.2525698\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A matrix: 4 × 2 of type dbl\n",
+ "\n",
+ "| | 2.5 % | 97.5 % |\n",
+ "|---|---|---|\n",
+ "| X1 | 2.7357949 | 3.1531603 |\n",
+ "| X2 | 2.8445200 | 3.2380291 |\n",
+ "| X3 | 2.7902789 | 3.1605290 |\n",
+ "| X50 | -0.1086592 | 0.2525698 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " 2.5 % 97.5 % \n",
+ "X1 2.7357949 3.1531603\n",
+ "X2 2.8445200 3.2380291\n",
+ "X3 2.7902789 3.1605290\n",
+ "X50 -0.1086592 0.2525698"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "confint(lasso.effect, level = 0.95, joint = TRUE)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 4.3. Application: the effect of gender on wage."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 29217
- 136
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 29217\n",
+ "\\item 136\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 29217\n",
+ "2. 136\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 29217 136"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 29217
- 116
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 29217\n",
+ "\\item 116\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 29217\n",
+ "2. 116\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 29217 116"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "library(hdm)\n",
+ "data(cps2012)\n",
+ "X <- model.matrix(~-1 + female + female:(widowed + divorced + separated + nevermarried +\n",
+ "hsd08 + hsd911 + hsg + cg + ad + mw + so + we + exp1 + exp2 + exp3) + +(widowed +\n",
+ "divorced + separated + nevermarried + hsd08 + hsd911 + hsg + cg + ad + mw + so +\n",
+ "we + exp1 + exp2 + exp3)^2, data = cps2012)\n",
+ "dim(X)\n",
+ "## [1] 29217 136\n",
+ "X <- X[, which(apply(X, 2, var) != 0)] # exclude all constant variables\n",
+ "dim(X)\n",
+ "## [1] 29217 116\n",
+ "index.gender <- grep(\"female\", colnames(X))\n",
+ "y <- cps2012$lnw"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[1] \"Estimates and significance testing of the effect of target variables\"\n",
+ " Estimate. Std. Error t value Pr(>|t|) \n",
+ "female -0.154923 0.050162 -3.088 0.002012 ** \n",
+ "female:widowed 0.136095 0.090663 1.501 0.133325 \n",
+ "female:divorced 0.136939 0.022182 6.174 6.68e-10 ***\n",
+ "female:separated 0.023303 0.053212 0.438 0.661441 \n",
+ "female:nevermarried 0.186853 0.019942 9.370 < 2e-16 ***\n",
+ "female:hsd08 0.027810 0.120914 0.230 0.818092 \n",
+ "female:hsd911 -0.119335 0.051880 -2.300 0.021435 * \n",
+ "female:hsg -0.012890 0.019223 -0.671 0.502518 \n",
+ "female:cg 0.010139 0.018327 0.553 0.580114 \n",
+ "female:ad -0.030464 0.021806 -1.397 0.162405 \n",
+ "female:mw -0.001063 0.019192 -0.055 0.955811 \n",
+ "female:so -0.008183 0.019357 -0.423 0.672468 \n",
+ "female:we -0.004226 0.021168 -0.200 0.841760 \n",
+ "female:exp1 0.004935 0.007804 0.632 0.527139 \n",
+ "female:exp2 -0.159519 0.045300 -3.521 0.000429 ***\n",
+ "female:exp3 0.038451 0.007861 4.891 1.00e-06 ***\n",
+ "---\n",
+ "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n",
+ "\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "effects.female <- rlassoEffects(x = X, y = y, index = index.gender)\n",
+ "summary(effects.female)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A matrix: 16 × 2 of type dbl\n",
+ "\n",
+ "\t | 2.5 % | 97.5 % |
+ "\n",
+ "\n",
+ "\tfemale | -0.29478224 | -0.01506433 |
+ "\tfemale:widowed | -0.13475121 | 0.40694218 |
+ "\tfemale:divorced | 0.07454815 | 0.19933062 |
+ "\tfemale:separated | -0.11727723 | 0.16388275 |
+ "\tfemale:nevermarried | 0.12902723 | 0.24467974 |
+ "\tfemale:hsd08 | -0.37611300 | 0.43173362 |
+ "\tfemale:hsd911 | -0.26962418 | 0.03095410 |
+ "\tfemale:hsg | -0.06534868 | 0.03956912 |
+ "\tfemale:cg | -0.04188922 | 0.06216633 |
+ "\tfemale:ad | -0.09609866 | 0.03517117 |
+ "\tfemale:mw | -0.05492271 | 0.05279584 |
+ "\tfemale:so | -0.06293909 | 0.04657240 |
+ "\tfemale:we | -0.06587457 | 0.05742232 |
+ "\tfemale:exp1 | -0.01658120 | 0.02645172 |
+ "\tfemale:exp2 | -0.28464359 | -0.03439506 |
+ "\tfemale:exp3 | 0.01676128 | 0.06013987 |
+ "\n",
+ "
+ ],
+ "text/latex": [
+ "A matrix: 16 × 2 of type dbl\n",
+ "\\begin{tabular}{r|ll}\n",
+ " & 2.5 \\% & 97.5 \\%\\\\\n",
+ "\\hline\n",
+ "\tfemale & -0.29478224 & -0.01506433\\\\\n",
+ "\tfemale:widowed & -0.13475121 & 0.40694218\\\\\n",
+ "\tfemale:divorced & 0.07454815 & 0.19933062\\\\\n",
+ "\tfemale:separated & -0.11727723 & 0.16388275\\\\\n",
+ "\tfemale:nevermarried & 0.12902723 & 0.24467974\\\\\n",
+ "\tfemale:hsd08 & -0.37611300 & 0.43173362\\\\\n",
+ "\tfemale:hsd911 & -0.26962418 & 0.03095410\\\\\n",
+ "\tfemale:hsg & -0.06534868 & 0.03956912\\\\\n",
+ "\tfemale:cg & -0.04188922 & 0.06216633\\\\\n",
+ "\tfemale:ad & -0.09609866 & 0.03517117\\\\\n",
+ "\tfemale:mw & -0.05492271 & 0.05279584\\\\\n",
+ "\tfemale:so & -0.06293909 & 0.04657240\\\\\n",
+ "\tfemale:we & -0.06587457 & 0.05742232\\\\\n",
+ "\tfemale:exp1 & -0.01658120 & 0.02645172\\\\\n",
+ "\tfemale:exp2 & -0.28464359 & -0.03439506\\\\\n",
+ "\tfemale:exp3 & 0.01676128 & 0.06013987\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A matrix: 16 × 2 of type dbl\n",
+ "\n",
+ "| | 2.5 % | 97.5 % |\n",
+ "|---|---|---|\n",
+ "| female | -0.29478224 | -0.01506433 |\n",
+ "| female:widowed | -0.13475121 | 0.40694218 |\n",
+ "| female:divorced | 0.07454815 | 0.19933062 |\n",
+ "| female:separated | -0.11727723 | 0.16388275 |\n",
+ "| female:nevermarried | 0.12902723 | 0.24467974 |\n",
+ "| female:hsd08 | -0.37611300 | 0.43173362 |\n",
+ "| female:hsd911 | -0.26962418 | 0.03095410 |\n",
+ "| female:hsg | -0.06534868 | 0.03956912 |\n",
+ "| female:cg | -0.04188922 | 0.06216633 |\n",
+ "| female:ad | -0.09609866 | 0.03517117 |\n",
+ "| female:mw | -0.05492271 | 0.05279584 |\n",
+ "| female:so | -0.06293909 | 0.04657240 |\n",
+ "| female:we | -0.06587457 | 0.05742232 |\n",
+ "| female:exp1 | -0.01658120 | 0.02645172 |\n",
+ "| female:exp2 | -0.28464359 | -0.03439506 |\n",
+ "| female:exp3 | 0.01676128 | 0.06013987 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " 2.5 % 97.5 % \n",
+ "female -0.29478224 -0.01506433\n",
+ "female:widowed -0.13475121 0.40694218\n",
+ "female:divorced 0.07454815 0.19933062\n",
+ "female:separated -0.11727723 0.16388275\n",
+ "female:nevermarried 0.12902723 0.24467974\n",
+ "female:hsd08 -0.37611300 0.43173362\n",
+ "female:hsd911 -0.26962418 0.03095410\n",
+ "female:hsg -0.06534868 0.03956912\n",
+ "female:cg -0.04188922 0.06216633\n",
+ "female:ad -0.09609866 0.03517117\n",
+ "female:mw -0.05492271 0.05279584\n",
+ "female:so -0.06293909 0.04657240\n",
+ "female:we -0.06587457 0.05742232\n",
+ "female:exp1 -0.01658120 0.02645172\n",
+ "female:exp2 -0.28464359 -0.03439506\n",
+ "female:exp3 0.01676128 0.06013987"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "joint.CI <- confint(effects.female, level = 0.95, joint = TRUE)\n",
+ "joint.CI"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 4.4. Application: Estimation of the treatment effect in a linear model with many confounding factors"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 90
- 63
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 90\n",
+ "\\item 63\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 90\n",
+ "2. 63\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 90 63"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "data(GrowthData)\n",
+ "dim(GrowthData)\n",
+ "## [1] 90 63\n",
+ "y = GrowthData[, 1, drop = F]\n",
+ "d = GrowthData[, 3, drop = F]\n",
+ "X = as.matrix(GrowthData)[, -c(1, 2, 3)]\n",
+ "varnames = colnames(GrowthData)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "xnames = varnames[-c(1, 2, 3)] # names of X variables\n",
+ "dandxnames = varnames[-c(1, 2)] # names of D and X variables\n",
+ "# create formulas by pasting names (this saves typing times)\n",
+ "fmla = as.formula(paste(\"Outcome ~ \", paste(dandxnames, collapse = \"+\")))\n",
+ "ls.effect = lm(fmla, data = GrowthData)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[1] \"Estimates and significance testing of the effect of target variables\"\n",
+ " Estimate. Std. Error t value Pr(>|t|) \n",
+ "[1,] -0.04981 0.01394 -3.574 0.000351 ***\n",
+ "---\n",
+ "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n",
+ "\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "dX = as.matrix(cbind(d, X))\n",
+ "lasso.effect = rlassoEffect(x = X, y = y, d = d, method = \"partialling out\")\n",
+ "summary(lasso.effect)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[1] \"Estimates and significance testing of the effect of target variables\"\n",
+ " Estimate. Std. Error t value Pr(>|t|) \n",
+ "gdpsh465 -0.05001 0.01579 -3.167 0.00154 **\n",
+ "---\n",
+ "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n",
+ "\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "doublesel.effect = rlassoEffect(x = X, y = y, d = d, method = \"double selection\")\n",
+ "summary(doublesel.effect)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A xtable: 3 × 2\n",
+ "\n",
+ "\t | Estimate | Std. Error |
+ "\t | <dbl> | <dbl> |
+ "\n",
+ "\n",
+ "\tfull reg via ols | -0.009377989 | 0.02988773 |
+ "\tpartial reg\n",
+ "via post-lasso | -0.049811465 | 0.01393636 |
+ "\tpartial reg via double selection | -0.050005855 | 0.01579138 |
+ "\n",
+ "
+ ],
+ "text/latex": [
+ "A xtable: 3 × 2\n",
+ "\\begin{tabular}{r|ll}\n",
+ " & Estimate & Std. Error\\\\\n",
+ " & & \\\\\n",
+ "\\hline\n",
+ "\tfull reg via ols & -0.009377989 & 0.02988773\\\\\n",
+ "\tpartial reg\n",
+ "via post-lasso & -0.049811465 & 0.01393636\\\\\n",
+ "\tpartial reg via double selection & -0.050005855 & 0.01579138\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A xtable: 3 × 2\n",
+ "\n",
+ "| | Estimate <dbl> | Std. Error <dbl> |\n",
+ "|---|---|---|\n",
+ "| full reg via ols | -0.009377989 | 0.02988773 |\n",
+ "| partial reg\n",
+ "via post-lasso | -0.049811465 | 0.01393636 |\n",
+ "| partial reg via double selection | -0.050005855 | 0.01579138 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Estimate Std. Error\n",
+ "full reg via ols -0.009377989 0.02988773\n",
+ "partial reg\\nvia post-lasso -0.049811465 0.01393636\n",
+ "partial reg via double selection -0.050005855 0.01579138"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "library(xtable)\n",
+ "table = rbind(summary(ls.effect)$coef[\"gdpsh465\", 1:2], summary(lasso.effect)$coef[,\n",
+ "1:2], summary(doublesel.effect)$coef[, 1:2])\n",
+ "colnames(table) = c(\"Estimate\", \"Std. Error\") #names(summary(full.fit)£coef)[1:2]\n",
+ "rownames(table) = c(\"full reg via ols\", \"partial reg\n",
+ "via post-lasso \", \"partial reg via double selection\")\n",
+ "tab = xtable(table, digits = c(2, 2, 5))\n",
+ "tab"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 5. Instrumental Variable Estimation in a High-Dimensional Setting"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 5.2. Application: Economic Development and Institutions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "- 64
- 21
+ ],
+ "text/latex": [
+ "\\begin{enumerate*}\n",
+ "\\item 64\n",
+ "\\item 21\n",
+ "\\end{enumerate*}\n"
+ ],
+ "text/markdown": [
+ "1. 64\n",
+ "2. 21\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ "[1] 64 21"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "data(AJR)\n",
+ "y = AJR$GDP\n",
+ "d = AJR$Exprop\n",
+ "z = AJR$logMort\n",
+ "x = model.matrix(~-1 + (Latitude + Latitude2 + Africa + Asia + Namer + Samer)^2,\n",
+ "data = AJR)\n",
+ "dim(x)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1] \"Estimation and significance testing of the effect of target variables in the IV regression model\"\n",
+ " coeff. se. t-value p-value \n",
+ "Exprop 0.8450 0.2699 3.131 0.00174 **\n",
+ "---\n",
+ "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "AJR.Xselect = rlassoIV(GDP ~ Exprop + (Latitude + Latitude2 + Africa + Asia + Namer +\n",
+ "Samer)^2 | logMort + (Latitude + Latitude2 + Africa + Asia + Namer + Samer)^2,\n",
+ "data = AJR, select.X = TRUE, select.Z = FALSE)\n",
+ "summary(AJR.Xselect)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " 2.5 % 97.5 %\n",
+ "Exprop 0.3159812 1.374072\n"
+ ]
+ }
+ ],
+ "source": [
+ "confint(AJR.Xselect)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " [,1] [,2]\n",
+ "rD 1.27 1.73\n"
+ ]
+ }
+ ],
+ "source": [
+ "# parialling out by linear model\n",
+ "fmla.y = GDP ~ (Latitude + Latitude2 + Africa + Asia + Namer + Samer)^2\n",
+ "fmla.d = Exprop ~ (Latitude + Latitude2 + Africa + Asia + Namer + Samer)^2\n",
+ "fmla.z = logMort ~ (Latitude + Latitude2 + Africa + Asia + Namer + Samer)^2\n",
+ "rY = lm(fmla.y, data = AJR)$res\n",
+ "rD = lm(fmla.d, data = AJR)$res\n",
+ "rZ = lm(fmla.z, data = AJR)$res\n",
+ "# ivfit.lm = tsls(y=rY,d=rD, x=NULL, z=rZ, intercept=FALSE)\n",
+ "ivfit.lm = tsls(rY ~ rD | rZ, intercept = FALSE)\n",
+ "print(cbind(ivfit.lm$coef, ivfit.lm$se), digits = 3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# parialling out by lasso\n",
+ "rY = rlasso(fmla.y, data = AJR)$res\n",
+ "rD = rlasso(fmla.d, data = AJR)$res\n",
+ "rZ = rlasso(fmla.z, data = AJR)$res\n",
+ "# ivfit.lasso = tsls(y=rY,d=rD, x=NULL, z=rZ, intercept=FALSE)\n",
+ "ivfit.lasso = tsls(rY ~ rD | rZ, intercept = FALSE)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1] \"Estimates and Significance Testing from from tsls\"\n",
+ " Estimate Std. Error t value p value \n",
+ "rD 0.8450 0.2699 3.131 0.00174 **\n",
+ "---\n",
+ "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "summary(ivfit.lasso)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 5.3. Application: Impact of Eminent Domain Decisions on Economic Outcomes."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "data(EminentDomain)\n",
+ "z <- as.matrix(EminentDomain$logGDP$z)\n",
+ "x <- as.matrix(EminentDomain$logGDP$x)\n",
+ "y <- EminentDomain$logGDP$y\n",
+ "d <- EminentDomain$logGDP$d\n",
+ "x <- x[, apply(x, 2, mean, na.rm = TRUE) > 0.05] #\n",
+ "z <- z[, apply(z, 2, mean, na.rm = TRUE) > 0.05] #"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "ED.ols = lm(y ~ cbind(d, x))\n",
+ "ED.2sls = tsls(y = y, d = d, x = x, z = z[, 1:2], intercept = FALSE)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1] \"Estimates and significance testing of the effect of target variables in the IV regression model\"\n",
+ " coeff. se. t-value p-value\n",
+ "d1 0.4146 0.2902 1.428 0.153\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "lasso.IV.Z = rlassoIV(x = x, d = d, y = y, z = z, select.X = FALSE, select.Z = TRUE)\n",
+ "summary(lasso.IV.Z)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " 2.5 % 97.5 %\n",
+ "d1 -0.1542764 0.9834796\n"
+ ]
+ }
+ ],
+ "source": [
+ "confint(lasso.IV.Z)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimates and Significance Testing of the effect of target variables in the IV regression model \n",
+ " coeff. se. t-value p-value\n",
+ "d1 -0.02383 0.12851 -0.185 0.853\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "lasso.IV.XZ = rlassoIV(x = x, d = d, y = y, z = z, select.X = TRUE, select.Z = TRUE)\n",
+ "summary(lasso.IV.XZ)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " 2.5 % 97.5 %\n",
+ "d1 -0.2757029 0.2280335\n"
+ ]
+ }
+ ],
+ "source": [
+ "confint(lasso.IV.XZ)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[1] \"Estimates and significance testing of the effect of target variables in the IV regression model\"\n",
+ " coeff. se. t-value p-value\n",
+ "d1 0.4146 0.2902 1.428 0.153\n",
+ "\n",
+ "\n",
+ "Estimates and Significance Testing of the effect of target variables in the IV regression model \n",
+ " coeff. se. t-value p-value\n",
+ "d1 -0.02383 0.12851 -0.185 0.853\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A xtable: 4 × 2\n",
+ "\n",
+ "\t | Estimate | Std. Error |
+ "\t | <dbl> | <dbl> |
+ "\n",
+ "\n",
+ "\tols regression | 0.007864732 | 0.009865927 |
+ "\tIV estimation | -0.010733269 | 0.033766362 |
+ "\tselection on Z | 0.414601641 | 0.290249208 |
+ "\tselection on X and Z | -0.023834697 | 0.128506538 |
+ "\n",
+ "
+ ],
+ "text/latex": [
+ "A xtable: 4 × 2\n",
+ "\\begin{tabular}{r|ll}\n",
+ " & Estimate & Std. Error\\\\\n",
+ " & & \\\\\n",
+ "\\hline\n",
+ "\tols regression & 0.007864732 & 0.009865927\\\\\n",
+ "\tIV estimation & -0.010733269 & 0.033766362\\\\\n",
+ "\tselection on Z & 0.414601641 & 0.290249208\\\\\n",
+ "\tselection on X and Z & -0.023834697 & 0.128506538\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A xtable: 4 × 2\n",
+ "\n",
+ "| | Estimate <dbl> | Std. Error <dbl> |\n",
+ "|---|---|---|\n",
+ "| ols regression | 0.007864732 | 0.009865927 |\n",
+ "| IV estimation | -0.010733269 | 0.033766362 |\n",
+ "| selection on Z | 0.414601641 | 0.290249208 |\n",
+ "| selection on X and Z | -0.023834697 | 0.128506538 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " Estimate Std. Error \n",
+ "ols regression 0.007864732 0.009865927\n",
+ "IV estimation -0.010733269 0.033766362\n",
+ "selection on Z 0.414601641 0.290249208\n",
+ "selection on X and Z -0.023834697 0.128506538"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "library(xtable)\n",
+ "table = matrix(0, 4, 2)\n",
+ "table[1, ] = summary(ED.ols)$coef[2, 1:2]\n",
+ "table[2, ] = cbind(ED.2sls$coef[1], ED.2sls$se[1])\n",
+ "table[3, ] = summary(lasso.IV.Z)[, 1:2]\n",
+ "table[4, ] = summary(lasso.IV.XZ)[, 1:2]\n",
+ "colnames(table) = c(\"Estimate\", \"Std. Error\")\n",
+ "rownames(table) = c(\"ols regression\", \"IV estimation \", \"selection on Z\", \"selection on X and Z\")\n",
+ "tab = xtable(table, digits = c(2, 2, 7))\n",
+ "tab"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 6. Inference on Treatment Effects in a High-Dimensional Setting"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 6.3. Application: 401(k) plan participation."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "data(pension)\n",
+ "y = pension$tw\n",
+ "d = pension$p401\n",
+ "z = pension$e401\n",
+ "X = pension[, c(\"i2\", \"i3\", \"i4\", \"i5\", \"i6\", \"i7\", \"a2\", \"a3\", \"a4\", \"a5\", \"fsize\",\n",
+ "\"hs\", \"smcol\", \"col\", \"marr\", \"twoearn\", \"db\", \"pira\", \"hown\")] # simple model\n",
+ "xvar = c(\"i2\", \"i3\", \"i4\", \"i5\", \"i6\", \"i7\", \"a2\", \"a3\", \"a4\", \"a5\", \"fsize\", \"hs\",\n",
+ "\"smcol\", \"col\", \"marr\", \"twoearn\", \"db\", \"pira\", \"hown\")\n",
+ "xpart = paste(xvar, collapse = \"+\")\n",
+ "form = as.formula(paste(\"tw ~ \", paste(c(\"p401\", xvar), collapse = \"+\"), \"|\", paste(xvar,\n",
+ "collapse = \"+\")))\n",
+ "formZ = as.formula(paste(\"tw ~ \", paste(c(\"p401\", xvar), collapse = \"+\"), \"|\", paste(c(\"e401\",\n",
+ "xvar), collapse = \"+\")))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimation and significance testing of the treatment effect \n",
+ "Type: ATE \n",
+ "Bootstrap: not applicable \n",
+ " coeff. se. t-value p-value \n",
+ "TE 10180 1931 5.273 1.34e-07 ***\n",
+ "---\n",
+ "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "pension.ate = rlassoATE(form, data = pension)\n",
+ "summary(pension.ate)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Estimation and significance testing of the treatment effect \n",
+ "Type: ATET \n",
+ "Bootstrap: not applicable \n",
+ " coeff. se. t-value p-value \n",
+ "TE 12628 2944 4.289 1.8e-05 ***\n",
+ "---\n",
+ "Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "# pension.atet = rlassoATET(X,d,y)\n",
+ "pension.atet = rlassoATET(form, data = pension)\n",
+ "summary(pension.atet)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "ename": "ERROR",
+ "evalue": "Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'drop': argumentos no compatibles\n",
+ "output_type": "error",
+ "traceback": [
+ "Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function 'drop': argumentos no compatibles\nTraceback:\n",
+ "1. rlassoLATE(X, d, y, z)",
+ "2. rlassoLATE.default(X, d, y, z)",
+ "3. rlassologit(d[indz0] ~ x[indz0, , drop = FALSE], post = post, \n . intercept = intercept, penalty = penalty)",
+ "4. rlassologit.formula(d[indz0] ~ x[indz0, , drop = FALSE], post = post, \n . intercept = intercept, penalty = penalty)",
+ "5. rlassologit(x, y, post = post, intercept = intercept, model = model, \n . penalty = penalty, control = control, ...)",
+ "6. rlassologit.default(x, y, post = post, intercept = intercept, \n . model = model, penalty = penalty, control = control, ...)",
+ "7. glmnet::glmnet(x, y, family = c(\"binomial\"), alpha = 1, lambda = lambda[1], \n . standardize = TRUE, intercept = intercept)",
+ "8. lognet(xd, is.sparse, ix, jx, y, weights, offset, alpha, nobs, \n . nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd, \n . intr, vnames, maxit, kopt, family, pb)",
+ "9. drop(y %*% rep(1, nc))",
+ "10. .handleSimpleError(function (cond) \n . .Internal(C_tryCatchHelper(addr, 1L, cond)), \"argumentos no compatibles\", \n . base::quote(y %*% rep(1, nc)))",
+ "11. h(simpleError(msg, call))"
+ ]
+ }
+ ],
+ "source": [
+ "pension.late = rlassoLATE(X, d, y, z)\n",
+ "summary(pension.late)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "pension.latet = rlassoLATET(X, d, y, z)\n",
+ "summary(pension.latet)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "library(xtable)\n",
+ "table = matrix(0, 4, 2)\n",
+ "table[1, ] = summary(pension.ate)[, 1:2]\n",
+ "table[2, ] = summary(pension.atet)[, 1:2]\n",
+ "table[3, ] = summary(pension.late)[, 1:2]\n",
+ "table[4, ] = summary(pension.latet)[, 1:2]\n",
+ "colnames(table) = c(\"Estimate\", \"Std. Error\")\n",
+ "rownames(table) = c(\"ATE\", \"ATET \", \"LATE\", \"LATET\")\n",
+ "tab = xtable(table, digits = c(2, 2, 2))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# X = model.matrix(~ -1 + (i2 + i3 + i4 + i5 + i6 + i7 + a2 + a3 + a4 + a5 +\n",
+ "# fsize + hs + smcol + col + marr + twoearn + db + pira + hown)^2, data =\n",
+ "# pension) # model with interactions\n",
+ "xvar2 <- paste(\"(\", xvar, \")^2\", sep = \"\")\n",
+ "formExt = as.formula(paste(\"tw ~ \", paste(c(\"p401\", xvar2), collapse = \"+\"), \"|\",\n",
+ "paste(xvar2, collapse = \"+\")))\n",
+ "formZExt = as.formula(paste(\"tw ~ \", paste(c(\"p401\", xvar2), collapse = \"+\"), \"|\",\n",
+ "paste(c(\"e401\", xvar2), collapse = \"+\")))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "pension.ate = rlassoATE(X, z, y)\n",
+ "pension.atet = rlassoATET(X, z, y)\n",
+ "pension.late = rlassoLATE(X, d, y, z)\n",
+ "pension.latet = rlassoLATET(X, d, y, z)\n",
+ "# pension.ate = rlassoATE(formExt, data = pension) pension.atet =\n",
+ "# rlassoATET(formExt, data = pension) pension.late = rlassoLATE(formZExt, data =\n",
+ "# pension) pension.latet = rlassoLATET(formZExt, data = pension)\n",
+ "table = matrix(0, 4, 2)\n",
+ "table[1, ] = summary(pension.ate)[, 1:2]\n",
+ "table[2, ] = summary(pension.atet)[, 1:2]\n",
+ "table[3, ] = summary(pension.late)[, 1:2]\n",
+ "table[4, ] = summary(pension.latet)[, 1:2]\n",
+ "colnames(table) = c(\"Estimate\", \"Std. Error\")\n",
+ "rownames(table) = c(\"ATE\", \"ATET \", \"LATE\", \"LATET\")\n",
+ "tab = xtable(table, digits = c(2, 2, 2))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 7. The Lasso Methods for Discovery of Significant Causes amongst Many Potential Causes, with Many Controls"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# library(hdm) library(stats)\n",
+ "set.seed(1)\n",
+ "n = 100\n",
+ "p1 = 20\n",
+ "p2 = 20\n",
+ "D = matrix(rnorm(n * p1), n, p1) # Causes\n",
+ "W = matrix(rnorm(n * p2), n, p2) # Controls\n",
+ "X = cbind(D, W) # Regressors\n",
+ "Y = D[, 1] * 5 + W[, 1] * 5 + rnorm(n) #Outcome"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "A matrix: 20 × 2 of type dbl\n",
+ "\n",
+ "\t | 2.5 % | 97.5 % |
+ "\n",
+ "\n",
+ "\tV1 | 4.5145877 | 5.21430498 |
+ "\tV2 | -0.3142909 | 0.30494650 |
+ "\tV3 | -0.3524109 | 0.18678880 |
+ "\tV4 | -0.2542430 | 0.28738914 |
+ "\tV5 | -0.2765802 | 0.27627177 |
+ "\tV6 | -0.3214676 | 0.29422684 |
+ "\tV7 | -0.2262507 | 0.30094168 |
+ "\tV8 | -0.0473541 | 0.47366372 |
+ "\tV9 | -0.1865636 | 0.39023520 |
+ "\tV10 | -0.2372356 | 0.26411185 |
+ "\tV11 | -0.3147091 | 0.20945872 |
+ "\tV12 | -0.3091905 | 0.26572176 |
+ "\tV13 | -0.1741550 | 0.37682465 |
+ "\tV14 | -0.3235734 | 0.38543162 |
+ "\tV15 | -0.3219763 | 0.31312486 |
+ "\tV16 | -0.2649505 | 0.33100700 |
+ "\tV17 | -0.1792080 | 0.41696169 |
+ "\tV18 | -0.3693247 | 0.04695928 |
+ "\tV19 | -0.1073109 | 0.39368776 |
+ "\tV20 | -0.2157182 | 0.25543839 |
+ "\n",
+ "
+ ],
+ "text/latex": [
+ "A matrix: 20 × 2 of type dbl\n",
+ "\\begin{tabular}{r|ll}\n",
+ " & 2.5 \\% & 97.5 \\%\\\\\n",
+ "\\hline\n",
+ "\tV1 & 4.5145877 & 5.21430498\\\\\n",
+ "\tV2 & -0.3142909 & 0.30494650\\\\\n",
+ "\tV3 & -0.3524109 & 0.18678880\\\\\n",
+ "\tV4 & -0.2542430 & 0.28738914\\\\\n",
+ "\tV5 & -0.2765802 & 0.27627177\\\\\n",
+ "\tV6 & -0.3214676 & 0.29422684\\\\\n",
+ "\tV7 & -0.2262507 & 0.30094168\\\\\n",
+ "\tV8 & -0.0473541 & 0.47366372\\\\\n",
+ "\tV9 & -0.1865636 & 0.39023520\\\\\n",
+ "\tV10 & -0.2372356 & 0.26411185\\\\\n",
+ "\tV11 & -0.3147091 & 0.20945872\\\\\n",
+ "\tV12 & -0.3091905 & 0.26572176\\\\\n",
+ "\tV13 & -0.1741550 & 0.37682465\\\\\n",
+ "\tV14 & -0.3235734 & 0.38543162\\\\\n",
+ "\tV15 & -0.3219763 & 0.31312486\\\\\n",
+ "\tV16 & -0.2649505 & 0.33100700\\\\\n",
+ "\tV17 & -0.1792080 & 0.41696169\\\\\n",
+ "\tV18 & -0.3693247 & 0.04695928\\\\\n",
+ "\tV19 & -0.1073109 & 0.39368776\\\\\n",
+ "\tV20 & -0.2157182 & 0.25543839\\\\\n",
+ "\\end{tabular}\n"
+ ],
+ "text/markdown": [
+ "\n",
+ "A matrix: 20 × 2 of type dbl\n",
+ "\n",
+ "| | 2.5 % | 97.5 % |\n",
+ "|---|---|---|\n",
+ "| V1 | 4.5145877 | 5.21430498 |\n",
+ "| V2 | -0.3142909 | 0.30494650 |\n",
+ "| V3 | -0.3524109 | 0.18678880 |\n",
+ "| V4 | -0.2542430 | 0.28738914 |\n",
+ "| V5 | -0.2765802 | 0.27627177 |\n",
+ "| V6 | -0.3214676 | 0.29422684 |\n",
+ "| V7 | -0.2262507 | 0.30094168 |\n",
+ "| V8 | -0.0473541 | 0.47366372 |\n",
+ "| V9 | -0.1865636 | 0.39023520 |\n",
+ "| V10 | -0.2372356 | 0.26411185 |\n",
+ "| V11 | -0.3147091 | 0.20945872 |\n",
+ "| V12 | -0.3091905 | 0.26572176 |\n",
+ "| V13 | -0.1741550 | 0.37682465 |\n",
+ "| V14 | -0.3235734 | 0.38543162 |\n",
+ "| V15 | -0.3219763 | 0.31312486 |\n",
+ "| V16 | -0.2649505 | 0.33100700 |\n",
+ "| V17 | -0.1792080 | 0.41696169 |\n",
+ "| V18 | -0.3693247 | 0.04695928 |\n",
+ "| V19 | -0.1073109 | 0.39368776 |\n",
+ "| V20 | -0.2157182 | 0.25543839 |\n",
+ "\n"
+ ],
+ "text/plain": [
+ " 2.5 % 97.5 % \n",
+ "V1 4.5145877 5.21430498\n",
+ "V2 -0.3142909 0.30494650\n",
+ "V3 -0.3524109 0.18678880\n",
+ "V4 -0.2542430 0.28738914\n",
+ "V5 -0.2765802 0.27627177\n",
+ "V6 -0.3214676 0.29422684\n",
+ "V7 -0.2262507 0.30094168\n",
+ "V8 -0.0473541 0.47366372\n",
+ "V9 -0.1865636 0.39023520\n",
+ "V10 -0.2372356 0.26411185\n",
+ "V11 -0.3147091 0.20945872\n",
+ "V12 -0.3091905 0.26572176\n",
+ "V13 -0.1741550 0.37682465\n",
+ "V14 -0.3235734 0.38543162\n",
+ "V15 -0.3219763 0.31312486\n",
+ "V16 -0.2649505 0.33100700\n",
+ "V17 -0.1792080 0.41696169\n",
+ "V18 -0.3693247 0.04695928\n",
+ "V19 -0.1073109 0.39368776\n",
+ "V20 -0.2157182 0.25543839"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "confint(rlassoEffects(X, Y, index = c(1:p1)), joint = TRUE)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "R",
+ "language": "R",
+ "name": "ir"
+ },
+ "language_info": {
+ "codemirror_mode": "r",
+ "file_extension": ".r",
+ "mimetype": "text/x-r-source",
+ "name": "R",
+ "pygments_lexer": "r",
+ "version": "4.2.0"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2