-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSimulate.R
138 lines (105 loc) · 4.26 KB
/
Simulate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
Simulate <- function(n) {
# Try to simulate the data based on this
n <- n # number of subjects
K <- 25 # number of planned repeated measurements per subject, per outcome
t.max <- 15 # maximum follow-up time
################################################
# parameters for the linear mixed effects model
betas <- c("Group0" = 1, "Group1" = 0.1, "Time1" = 0.3,
"Time1:Time1" = 0.1)
sigma.y <- 1 # measurement error standard deviation
# parameters for the survival model
gammas <- c("(Intercept)" = -7.5, "Group" = -2) # coefficients for baseline covariates
alpha <- 1 # association parameter
phi <- 2 # shape for the Weibull baseline hazard
mean.Cens <- 3.2 # mean of the exponential distribution for the censoring mechanism
D <- diag(c(0.99, 0.99, 0.07)^2)
################################################
Bkn <- c(0.5, t.max + 0.5)
kn <- c(mean.Cens)
# design matrices for the longitudinal measurement model
# but this can be easily generalized
times <- c(replicate(n, c(0, sort(runif(K-1, 0, t.max))))) # at which time points longitudinal measurements are supposed to be taken
group <- rep(0:1, each = n/2) # group indicator, i.e., '0' placebo, '1' active treatment
DF <- data.frame(obstime = times, gender = factor(rep(group, each = K)))
X <- model.matrix(~ gender + obstime + I(obstime*obstime), data = DF)
Z <- model.matrix(~ obstime + I(obstime*obstime), data = DF)
# design matrix for the survival model
W <- cbind("(Intercept)" = 1, "Group" = group)
################################################
#simulate random effects
b <- mvrnorm(n, rep(0, nrow(D)), D)
# simulate longitudinal responses
id <- rep(1:n, each = K)
eta.y <- as.vector(X %*% betas + rowSums(Z * b[id, ])) # linear predictor
y <- rnorm(n * K, eta.y, sigma.y)
# simulate event times
eta.t <- as.vector(W %*% gammas)
invS <- function (t, u, i) {
h <- function (s) {
group0 <- 1 - group[i]
group1 <- group[i]
XX <- cbind(group0, group1, s, (s^2))
ZZ <- cbind(1, s, s^2)
f1 <- as.vector(XX %*% betas + rowSums(ZZ * b[rep(i, nrow(ZZ)), ]))
exp(log(phi) + (phi - 1) * log(s) + eta.t[i] + f1 * alpha)
}
integrate(h, lower = 0, upper = t)$value + log(u)
}
u <- runif(n)
trueTimes <- numeric(n)
for (i in 1:n) {
Up <- 50
tries <- 20
Root <- try(uniroot(invS, interval = c(1e-05, Up), u = u[i], i = i)$root, TRUE)
while(inherits(Root, "try-error") && tries > 0) {
tries <- tries - 1
Up <- Up + 200
Root <- try(uniroot(invS, interval = c(1e-05, Up), u = u[i], i = i)$root, TRUE)
}
trueTimes[i] <- if (!inherits(Root, "try-error")) Root else NA
}
na.ind <- !is.na(trueTimes)
trueTimes <- trueTimes[na.ind]
W <- W[na.ind, , drop = FALSE]
long.na.ind <- rep(na.ind, each = K)
y <- y[long.na.ind]
X <- X[long.na.ind, , drop = FALSE]
Z <- Z[long.na.ind, , drop = FALSE]
DF <- DF[long.na.ind, ]
n <- length(trueTimes)
# simulate censoring times from an exponential distribution,
# and calculate the observed event times, i.e., min(true event times, censoring times)
Ctimes <- runif(n, 0, 2 * mean.Cens)
Time <- pmin(trueTimes, Ctimes)
event <- as.numeric(trueTimes <= Ctimes) # event indicator
################################################
# keep the nonmissing cases, i.e., drop the longitudinal measurements
# that were taken after the observed event time for each subject.
ind <- times[long.na.ind] <= rep(Time, each = K)
y <- y[ind]
X <- X[ind, , drop = FALSE]
Z <- Z[ind, , drop = FALSE]
id <- id[long.na.ind][ind]
id <- match(id, unique(id))
id.id <- unique(id)
dat <- DF[ind, ]
dat$id <- id
dat$y <- y
dat$Time <- Time[id]
dat$event <- event[id]
dat.id <- data.frame(id = id.id, Time = Time, event = event, group = W[,2])
names(dat) <- c("time", "group", "id", "y", "Time", "event")
# Imitate a case-cohort design
sample.ids <- sample(unique(dat$id), replace = FALSE, size = round(0.3333*length(unique(dat$id))))
event.ids <- dat$id[ which(dat$event ==1)]
cc.ids <- unique(c(sample.ids, event.ids) )
dat$y2 <- ifelse(dat$id %in% cc.ids, dat$y, NA)
dat$CC <- ifelse(dat$id %in% cc.ids, 1,0)
# delete all unused objects
rm(y, X, Z, id, n, na.ind, long.na.ind, ind, Ctimes, Time, event, W,
betas, sigma.y, gammas, alpha, eta.t, eta.y, phi, mean.Cens, t.max,
trueTimes, u, Root, invS, D, b, K,
times, group, i, tries, Up, Bkn, kn, DF, id.id)
return(dat)
}