-
Notifications
You must be signed in to change notification settings - Fork 0
/
Bayesian Statistics Smapling.Rmd
357 lines (325 loc) · 11.5 KB
/
Bayesian Statistics Smapling.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
---
title: "3607HW1"
output:
html_document
---
<style type="text/css">
body{ /* Normal */
font-size: 12px;
}
td { /* Table */
font-size: 8px;
}
h1.title {
font-size: 20px;
}
h1 { /* Header 1 */
font-size: 18px;
}
h2 { /* Header 2 */
font-size: 16px;
color: DarkBlue;
}
h3 { /* Header 3 */
font-size: 14px;
color: DarkBlue;
}
code.r{ /* Code block */
font-size: 11px;
}
pre { /* Code block - determines code spacing between lines */
font-size: 12px;
}
</style>
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,fig.width=6, fig.height=4)
```
## Question 1
###(c) Observed data
```{r y_obs,fig.width = 4, fig.height = 3}
y_obs <- read.csv("q1.csv",header = TRUE)
summary(y_obs)
n<-length(y_obs$y)
y_mean <- mean(y_obs$y)
y_sd <-sd(y_obs$y)
var <- y_sd^2*(n-1)/n
i <- seq(-4, 4, length=100)
true_norm <- dnorm(i,y_mean,sqrt(var))
par(mfrow=c(1,1))
hist(y_obs$y, freq = FALSE,xlim=c(-3,3), main="Histogram of y")
lines(i,true_norm, lty=2,col="red" ,lwd=2) #plot the normal distribution density based on sample statistics of y
```
###(d) Posterior distribution
```{r sample1,fig.width = 6, fig.height = 4}
set.seed(100)
mu0<-10
var0<-100
n<-100
y_bar <- mean(y_obs$y)
var_n <- var0/n
mu1<- (var0*y_bar + mu0*var_n)/(var0+var_n)
var1<- 1/(1/var0+1/var_n)
N<-10000
sample1 <- rep(1:N)
for (i in 1:N) {
sample1[i] <- rnorm(1,mean=mu1,sd=var1^0.5)}
par(mfrow=c(1,2))
j = 1:N
plot(j , sample1[j] , type="l" ,main=expression(paste("Trace plot of ",mu)),ylab=expression(mu))
plot(density(sample1) , type="l" ,main=expression(paste("Empirical density of " , mu) ) )
```
###(e) Posterior predictive distribution
```{r sample2,fig.width = 6, fig.height = 4}
mu2<-mu1
var2<-var0+var1
sample2 <- rep(1:N)
for (i in 1:N) {
sample2[i] <- rnorm(1,mean=mu2,sd=var2^0.5)}
par(mfrow=c(1,2))
j = 1:N
plot(j , sample2[j] , type="l" ,main=expression(paste("Trace plot of ",tilde(y)),ylab=tilde(y)))
plot(density(sample2) , type="l" ,main=expression(paste("Empirical density of " , tilde(y)) ) )
```
## Question 2
###(a)
Calculate posterior distribution with sample size n and sample mean $\overline{y}$ (sufficient statistics).
Since $\mu \sim N(0,\sigma^2_{0})$
$$f(\mu)=\frac{1}{\sqrt{2\pi\sigma^2_{0}}} e^{-\frac{\mu^{2}}{2\sigma^2_{0}}}$$
$$f(\overline{y}|\mu)=\frac{1}{\sqrt{2\pi/n}} e^{-\frac{n(\overline{y}-\mu)^{2}}{2}}$$
Hence
$$f(\mu|\overline{y})\propto f(\mu)f(\overline{y}|\mu) = \frac{1}{2\pi\sigma_{0}/\sqrt{n}} exp[{-\frac{\mu^{2}}{2\sigma^2_{0}}-\frac{(\overline{y}-\mu)^{2}}{2/n}}]$$
$$\propto exp(\frac{-\mu^{2}(1/n+\sigma^2_{0})+2y\mu\sigma^2_{0}-\overline{y}^2\sigma^2_{0}}{2\sigma^2_{0}/n})$$
$$\propto exp(\frac{-\mu^{2}+2y\mu\sigma^2_{0}/(1/n+\sigma^2_{0})-y^2[\sigma^2_{0}/(1/n+\sigma^2_{0})]^2}{(2\sigma^2_{0}/n)/(1/n+\sigma^2_{0})}+\frac{y^2 }{2(1/n+\sigma^2_{0})})$$
$$\propto exp(\frac{-[\mu-y\sigma^2_{0}/(1/n+\sigma^2_{0})]^2}{(2\sigma^2_{0}/n)/(1/n+\sigma^2_{0})})$$
Let $\sigma^2_{1}=(\sigma^2_{0}/n)/(1/n+\sigma^2_{0})$,
$\mu_1=(y\sigma^2_{0})/(1/n+\sigma^2_{0})=ny\sigma^2_{1}$, so that
$$f(\mu|y)\propto exp[\frac{-(\mu-\mu_1)^2}{2\sigma^2_{1}}]$$
Since the density must integrate to 1,
$$f(\mu|y)=\frac{1}{\sqrt{2\pi\sigma^2_{1}}} exp[\frac{-(\mu-\mu_1)^2}{2\sigma^2_{1}}]$$
Thus $\mu|y \sim N(\mu_1,\sigma^2_{1})$.
Calculate $\sigma^2_{1}$ and $\mu_1$.
```{r y_sample}
y_sample <- read.csv("q2.csv",header = TRUE)
summary(y_sample)
n<-500
y_bar <-mean(y_sample$y)
(var_1 <- 10^2/(1+10^2*n))
(mu_1 <- n*y_bar*var_1)
```
To find $P(H0|y)$, calculate Z statistics
$$ Z=\frac{0-\mu_1}{\sigma_1}\sim N(0,1)$$
$$P(H0|y) = \int_{-\infty}^{0}f(\mu|y) d\mu = \Phi(Z)$$
Where $\Phi(Z)$ is the cdf of $N(0,1)$.
```{r Z}
(Z <- (-mu_1)/sqrt(var_1))
(p <- pnorm(Z))
```
P(H0|y)=6.652995e-05.
Bayes Factor
$$BF=\frac{\int_{0}^{\infty}f(y|\mu)f(\mu|\mu\sim N(0,\sigma^2_{0})) d\mu}{\int_{-\infty}^{0}f(y|\mu)f(\mu|\mu\sim N(0,\sigma^2_{0})) d\mu}$$
```{r BF}
##P(y|M1)
P_M1<-integrate(function(lambda){dnorm(y_bar,mean=lambda,sd=1/sqrt(n))*dnorm(lambda,mean=0,sd=100)},lower
=0,upper=Inf)
##P(y|M0)
P_M0<-integrate(function(lambda){dnorm(y_bar,mean=lambda,sd=1/sqrt(n))*dnorm(lambda,mean=0,sd=100)},lower
=-Inf,upper=0)
(BF<-P_M1$value/P_M0$value)
log10(BF)
```
Bayes Factor = 15032.12, log10(BF) = 4.2 > 2.
Thus the evidence appears to be decisive to rej ect H0 and accept H1: $\mu>0$.
###(b)
This is a frequensist problem. Since variance is known, use Z statistics
$$ Z=\frac{\overline{y_{N}}-\mu}{\sigma/\sqrt{n}}\sim N(0,1)$$
Thus in this case, given H_0 is true, $\sigma=1$, $\mu=0$,$n=500$.
$$p-value = Pr(Z> z_{\alpha}|H_0)= 1-\Phi(z_{\alpha})= \int_{\overline{y_{N}}}^{\infty}\frac{1}{\sqrt{2\pi/n}} e^{-\frac{nx^{2}}{2}} dx$$
Conduct the one tailed hypothesis test (Right tail test):
H0: $\mu \le0$, H1:$\mu >0$.
```{r z.test.right}
z.test.right <- function(data, mu0, var,alpha)
{
z.stat <- (mean(data) - mu0) / (sqrt(var / length(data)))
z.critical <- qnorm(1-alpha) #Eg alpha 0.05 -> 1.64
p.value <- 1 - pnorm(z.stat)
if(z.stat >= z.critical)
{
print("Reject H0")
}
else
{
print("Accept H0")
}
print('Z statistic')
print(z.stat)
print('z critical value')
print(z.critical)
print('P value')
print(p.value)
return(z.stat)
}
z.test.right(y_sample$y, 0, 1,0.05)
```
p-value = 6.65e-05<0.05, H0 is rejected at 95% confidence level.
####(c)
(please see the handwritten derivation)
###(d)
```{r sigma_vector}
sigma_vector<-c(1,2,5,10,20,50,100)
p_vector <- rep(NA,7)
for (i in 1:7) {
n<-500
sigma<-sigma_vector[i]
var_1 <- sigma^2/(1+sigma^2*n)
mu_1 <- n*y_bar*var_1
Z <- (-mu_1)/sqrt(var_1)
p <- pnorm(Z)
p_vector[i]=p
}
log_sigma <-log(sigma_vector^2)
par(mfrow=c(1,1))
plot(p_vector~log_sigma,main=expression(paste("plot P(H0|y) versus log(" , sigma,"^2)")))
```
## Question 3
###(a)
For the Laplace distribution, first find the cdf:
$$F(x)=\frac{e^x}{2}, x<0$$
$$=1-\frac{e^{-x}}{2},x\ge0$$
Inverse cdf:
$$F^{-1}(x)=\ln(2x), 0<x\le\frac{1}{2}$$
$$=-\ln(2-2x), \frac{1}{2}<{x}\le1$$
Use inverse CDF transformation sampling:
```{r W,message = FALSE,fig.width = 4, fig.height = 3}
require(rmutil)
set.seed(99)
N<-10000
U<- runif(N)
W <- log(2*U)*(U<=1/2)+(-log(2*(1-U)))*(U>1/2)
j <- seq(-4, 4, length=100)
true_laplace <- dlaplace(j,0,1)
par(mfrow=c(1,1))
hist(W, freq = FALSE,breaks=seq(from=-9, to=13, by=0.5),ylim=c(0,0.5))
lines(j,true_laplace, lty=2,col="blue" ,lwd=2)
```
###(b)
To apply rejection sampling:
$$ f(x) \le cg(x)$$
For normal distribution N(0,1), its pdf
$$ f(x)=\frac{1}{\sqrt{2\pi}} e^{-\frac{x^{2}}{2}}$$
From question (a) we got:
$$g(x)=\frac{e^{-|x|}}{2}$$
To find c, define h(x) = f(x)/g(x). Since both f(x) and g(x) are symmetrical around x=0, we just need to solve $h'(x)=0$ at $x\ge0$.
$$h(x)=\frac{f(x)}{g(x)}=\frac{2}{\sqrt{2\pi}} e^{x-\frac{x^{2}}{2}},x\ge0$$
The value maximises when $x-\frac{x^{2}}{2}$ maximises, namely at value $x=1$.
Thus $c=\sqrt{\frac{2e}{\pi}}\approx1.32$
$$\frac{f(y)}{cg(y)} = e^{-\frac{(y−1)^2}{2}}$$
Step1: Draw U from Unif(0, 1).
Step2: Generate Y ∼ g(·) independently. Specifically, generate U0 from Unif(0, 1) and set Y = − ln(2U0).
Step3: If U ≤ f (Y)/cg(Y) , set |X| = Y ; otherwise, go to Step1.
Step4: Generate U1 from Unif(0, 1). Set X = -|X| if U ≤ 0.5, set X = |X| if U > 0.5.
```{r X,fig.width = 4, fig.height = 3}
rejection<-function(){
finished<-FALSE
while (!finished) {
U0<-runif(1)
Y<- -log(U0)
U<-runif(1)
accept_prob <- exp(-(Y-1)^2/2)
finished <- (U<accept_prob)
U1<-runif(1)
X<-Y * (U1>0.5) - Y*(U1<=0.5)
}
X}
X<- rep(NA,N)
for (i in 1:N) {X[i]<- rejection()}
j <- seq(-4, 4, length=100)
true_norm <- dnorm(j,0,1)
par(mfrow=c(1,1))
hist(X, freq = FALSE,ylim=c(0,0.5))
lines(j,true_norm, lty=2,col="blue" ,lwd=2)
```
##Question (4)
Posterior distribution are derived on the written page.
```{r message = FALSE}
reg <- read.delim("hw1reg.txt", header = TRUE,sep = " ",colClasses = c("numeric", "numeric", "numeric"))
summary(reg)
y<-reg$y
x1<-reg$x1
x2<-reg$x2
linreg = lm(y ~ x1 + x2)
summary(linreg)
```
Define the Gibbs sampling functions. Since r built in function do not supprot inverse gamma distribution, let precision $\tau=1/\sigma^2 $,
such that $\tau \sim Gamma(a+\frac{n}{2},b+\frac{\Sigma(y-\beta_0 - \beta_1 x_1 -\beta_2 x_2)^2}{2})$.
```{r }
sample_beta_0<-function(y, x1, x2, beta_1, beta_2,tau, mu_0, tau_0){
N = length(y)
precision = tau_0 + tau * N
mean = (tau_0 * mu_0 + tau * sum(y - beta_1 * x1-beta_2 * x2))/precision
new_beta0 <- rnorm(1,mean, 1 / sqrt(precision))
new_beta0 }
sample_beta_1<-function(y, x1, x2, beta_0, beta_2,tau, mu_1, tau_1){
N = length(y)
precision = tau_1 + tau * sum(x1^2)
mean = (tau_1 * mu_1 + tau * sum((y - beta_0-beta_2 * x2)*x1))/precision
new_beta1 <- rnorm(1,mean, 1 / sqrt(precision))
new_beta1 }
sample_beta_2<-function(y, x1, x2, beta_0, beta_1,tau, mu_2, tau_2){
N = length(y)
precision = tau_2 + tau * sum(x2^2)
mean = (tau_2 * mu_2 + tau * sum((y - beta_0-beta_1 * x1)*x2))/precision
new_beta2 <- rnorm(1,mean, 1 / sqrt(precision))
new_beta2 }
sample_tau<-function(y, x1, x2, beta_0, beta_1,beta_2, alpha, beta){
N = length(y)
alpha_new = alpha + N / 2
resid = y - beta_0 - beta_1 * x1 - beta_2*x2
beta_new = beta + sum(resid * resid) / 2
new_tau <-rgamma(1,alpha_new, 1 / beta_new)
new_tau}
```
Initilization of noninformative priors.
```{r init}
init <-c(beta_0= 0, beta_1= 0, beta_2= 0, tau= 1/1000) #tau = 1/sigma^2
hypers<-c(mu_0= 0, tau_0= 1/1000,mu_1= 0, tau_1= 1/1000, mu_2= 0, tau_2= 1/1000, a= 0.0001, b = 1)
iters <- 100000
```
Start Gibbs sampler and summarize results.
```{r}
set.seed(101)
gibbs <- function (y, x1, x2, iters, init, hypers){
beta_0 = init["beta_0"]
beta_1 = init["beta_1"]
beta_2 = init["beta_2"]
tau = init["tau"]
trace = matrix(0, nrow = 0, ncol = 4) ## trace to store values of betas & tau
t_names <- c("beta_0", "beta_1", "beta_2","tau")
colnames(trace) <- t_names
for (i in 1:iters) {
beta_0 = sample_beta_0(y, x1, x2, beta_1,beta_2, tau, hypers["mu_0"], hypers["tau_0"])
beta_1 = sample_beta_1(y, x1, x2, beta_0,beta_2, tau, hypers["mu_1"], hypers["tau_1"])
beta_2 = sample_beta_2(y, x1, x2, beta_0,beta_1, tau, hypers["mu_2"], hypers["tau_2"])
tau = sample_tau(y, x1, x2, beta_0,beta_1,beta_2, hypers["a"], hypers["b"])
trace <- rbind(trace, c(beta_0,beta_1,beta_2,tau))
}
trace}
trace = gibbs(y, x1,x2, iters, init, hypers)
gibbs_results <-cbind(trace[,1:3],1/trace[,4]) #convert tau back to sigma
colnames(gibbs_results) <- c("beta_0", "beta_1", "beta_2","sigma_squared")
```
Since the beginning of Gibbs sampling is unstable, the trace plot starts from the 100th iteration.
```{r,fig.width = 8, fig.height = 4}
j = 100:iters
par(mfrow=c(1,2))
plot(j , gibbs_results[j,1] , type="l" ,main=expression(paste("Trace plot of intercept (",beta,"0)")),ylab=expression(paste(beta,"0")))
plot(j , gibbs_results[j,2] , type="l" ,main=expression(paste("Trace plot of ",beta,"1")),ylab=expression(paste(beta,"1")))
plot(j , gibbs_results[j,3] , type="l" ,main=expression(paste("Trace plot of ",beta,"2")),ylab=expression(paste(beta,"2")))
plot(j , gibbs_results[j,4] , type="l" ,main=expression(paste("Trace plot of ",sigma^2)),ylab=expression(sigma^2))
posterior_mean = apply(gibbs_results, 2, mean)
posterior_sd = apply(gibbs_results, 2, sd)
print(cbind(posterior_mean,posterior_sd))
```
Compared with the frequensist linear regression results:
```{r linreg}
summary(linreg)$coefficients[ , 1:2]
```