Skip to content

Commit

Permalink
function now has a 'rep' argument for replicating the cross-fitting
Browse files Browse the repository at this point in the history
  • Loading branch information
kkholst committed Oct 1, 2024
1 parent 590a7d3 commit a6e78f6
Show file tree
Hide file tree
Showing 11 changed files with 331 additions and 174 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -56,5 +56,5 @@ ByteCompile: yes
RcppModules: riskregmodel
Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
VignetteBuilder: knitr
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# targeted 0.6
- repeated cross-fitting in `cate` function via the new 'rep' argument

# targeted 0.5
- `cate` now also returns the expected potential outcomes and influence functions
- Bug-fix in the `ml_model$update()` method
Expand Down
19 changes: 13 additions & 6 deletions R/aipw.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,44 @@
##' @export
##' @param response_model Model for the response given covariates (ml_model or
##' formula)
##' @param missing_model Missing data mechanism model (propensity model)
##' (ml_model or formula)
##' @param data data.frame
##' @param ... additional arguments (see [cate()])
##' @param formula design specifying the OLS estimator with outcome given by the
##' EIF
##' @param missing_model Optional missing_model (ml_model or formula). By
##' default will use the same design as the response_model.
##' @param ... arguments to cate
##' @examples
##' m <- lvm(y ~ x+z, r ~ x)
##' distribution(m,~ r) <- binomial.lvm()
##' transform(m, y0~r+y) <- function(x) { x[x[,1]==0,2] <- NA; x[,2] }
##' d <- sim(m,1e3,seed=1)
##'
##' aipw(y0 ~ x, data=d)
aipw <- function(response_model, data,
formula = ~1,
aipw <- function(response_model,
missing_model,
formula = ~1,
data,
...) {
if (inherits(response_model, "formula")) {
response_model <- ML(response_model)
}
resp <- lava::getoutcome(response_model$formula)
r <- !is.na(model.frame(as.formula(paste0(resp, "~1")), data = data, na.action = na.pass)) * 1
r <- !is.na(model.frame(
as.formula(paste0(resp, "~1")),
data = data, na.action = na.pass
)) * 1
data[, "R_"] <- r[, 1]
if (base::missing(missing_model)) {
missing_model <- update(response_model$formula, as.formula("R_ ~ ."))
}
if (inherits(missing_model, "formula")) {
missing_model <- ML(missing_model, family=binomial)
}
formula <- update(formula, as.formula("R_ ~ ."))
cate(formula, response_model, missing_model,
cate(response_model=response_model,
propensity_model=missing_model,
cate_model = formula,
data = data, contrast = 1, stratify = TRUE,
...
)
Expand Down
111 changes: 74 additions & 37 deletions R/ate.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,27 @@
##' @param formula Formula (see details below)
##' @param data data.frame
##' @param weights optional frequency weights
##' @param offset optional offset (character or vector). can also be specified in the formula.
##' @param offset optional offset (character or vector). can also be specified
##' in the formula.
##' @param family Exponential family argument for outcome model
##' @param nuisance outcome regression formula (Q-model)
##' @param propensity propensity model formula
##' @param all If TRUE all standard errors are calculated (default TRUE when exposure
##' only has two levels)
##' @param all If TRUE all standard errors are calculated (default TRUE when
##' exposure only has two levels)
##' @param labels Optional treatment labels
##' @param ... Additional arguments to lower level functions
##' @return An object of class '\code{ate.targeted}' is returned. See \code{\link{targeted-class}}
##' for more details about this class and its generic functions.
##' @details
##' The formula may either be specified as:
##' response ~ treatment | nuisance-formula | propensity-formula
##' @return An object of class '\code{ate.targeted}' is returned. See
##' \code{\link{targeted-class}} for more details about this class and its
##' generic functions.
##' @details The formula may either be specified as: response ~ treatment |
##' nuisance-formula | propensity-formula
##'
##' For example: \code{ate(y~a | x+z+a | x*z, data=...)}
##'
##' Alternatively, as a list: \code{ate(list(y~a, ~x+z, ~x*z), data=...)}
##'
##' Or using the nuisance (and propensity argument): \code{ate(y~a, nuisance=~x+z, ...)}
##' Or using the nuisance (and propensity argument):
##' \code{ate(y~a, nuisance=~x+z, ...)}
##' @export
##' @seealso cate
##' @author Klaus K. Holst
Expand Down Expand Up @@ -85,10 +87,8 @@ ate <- function(formula,
}
}
if (is.list(formula) || inherits(formula, "formula")) {
## xf <- lapply(xf, function(x) { environment(x) <- baseenv(); return(x) })
xx <- lapply(xf, function(x) model.matrix(x, data=data))
yx <- model.frame(xf[[1]], data=data)
## }
a <- yx[, exposure]
y <- yx[, yf[1]]
x1 <- xx[[2]]
Expand All @@ -100,24 +100,30 @@ ate <- function(formula,
} else {
stop("Expected a formula") # or a matrix (response,exposure)")
}
if (base::missing(weights) || length(weights)==0) weights <- rep(1, length(y))
if (base::missing(weights) || length(weights) == 0) {
weights <- rep(1, length(y))
}
if (inherits(weights, "formula")) weights <- all.vars(weights)
if (is.character(weights)) weights <- as.vector(data[, weights])

if (base::missing(offset) || length(offset)==0) offset <- rep(0, length(y))
if (inherits(offset, "formula")) offset <- all.vars(offset)
if (is.character(offset)) offset <- as.vector(data[, offset])
##l1 <- glm.fit(y=y, x=x1, weights=weights, family=family)
l1 <- glm(y ~ -1+x1, weights=weights, offset=offset, family=family)
beta <- coef(l1)
names(beta) <- gsub("^x1", "" ,names(beta))
## iid.beta <- fast_iid(y, l1$fitted, x1, weights, logistic=family$family=="binomial")/length(y)
names(beta) <- gsub("^x1", "", names(beta))
iid.beta <- IC(l1)/length(y)
treatments <- if (is.factor(a)) levels(a) else sort(unique(a))
if (length(treatments)>20) stop("Unexpected large amount of treatments")
if (base::missing(all)) all <- length(treatments)==2
if (length(treatments)>2) {
if (!is.factor(a)) warning("`", exposure, "` should probably be converted into a factor. An additive model is now assumed in the outcome regression model.")
if (!is.factor(a)) {
warning(
"`", exposure,
"` should probably be converted into a factor. ",
"An additive model is now assumed in the outcome regression model."
)
}
}
est <- c()
iids <- matrix(nrow=length(y), ncol=length(treatments))
Expand All @@ -131,16 +137,21 @@ ate <- function(formula,
count <- count+1
a0 <- (a==trt)
## For simplicity we here fit a logistic regression for each treatment.
## TODO: we do not need to recalculate logistic regression for the last treatment.
## TODO: we do not need to recalculate logistic regression for the last
## treatment.
l2 <- glm.fit(y=a0, x=x2, weights=weights, family=binomial("logit"))
gamma <- l2$coef
data0[, exposure] <- factor(trt, levels=treatments)
x1 <- model.matrix(xf[[2]], data=data0)
val <- ace_est(y=cbind(y), a=cbind(a0), x1=cbind(x1), x2=cbind(x2),
theta=c(beta, gamma), weights=weights, offset=offset, link=family$link)
val <- ace_est(
y = cbind(y), a = cbind(a0), x1 = cbind(x1), x2 = cbind(x2),
theta = c(beta, gamma), weights = weights,
offset = offset, link = family$link
)
alpha.index <- 1
beta.index <- seq_along(beta) + length(alpha.index)
gamma.index <- seq_along(gamma) + length(alpha.index) + length(beta.index)
gamma.index <- seq_along(gamma) +
length(alpha.index) + length(beta.index)
U0 <- val$u
DU <- t(val$du)
iid.gamma <- fast_iid(a0, l2$fitted, x2, weights)/length(a0)
Expand Down Expand Up @@ -171,9 +182,14 @@ ate <- function(formula,
est$IC <- iids * NROW(iids)
rownames(est$IC) <- rownames(data)
structure(list(estimate=est,
outcome.reg=outreg, propensity.model=propmod, names=unlist(nn)[1:2],
outcome.reg=outreg,
propensity.model=propmod,
names=unlist(nn)[1:2],
formula=xf,
npar=c(length(treatments), ncol(x1), ncol(x2)), nobs=length(y), opt=NULL,
call = cl,
npar=c(length(treatments), ncol(x1), ncol(x2)),
nobs=length(y),
opt=NULL,
all=all,
family=family),
class=c("ate.targeted", "targeted"))
Expand All @@ -187,7 +203,10 @@ print.summary.ate.targeted <- function(x, ...) {
outreg <- x$family$family
cat(" Response ", nam[[1]], " (Outcome model: ", outreg, "):\n", sep="")
cat("\t", paste(nam[[1]], x$formula[[2]]), "\n")
cat(" Exposure ", nam[[2]], " (Propensity model: logistic regression):\n", sep="")
cat(" Exposure ", nam[[2]],
" (Propensity model: logistic regression):\n",
sep = ""
)
cat("\t", paste(nam[[2]], x$formula[[3]]), "\n")
cat("\n")
if (x$all) {
Expand All @@ -209,7 +228,10 @@ print.summary.ate.targeted <- function(x, ...) {
if (length(x$contrast)>1) {
cc <- rownames(x$estimate$coefmat)
with(x, cat("\nAverage Treatment Effect (constrast: '",
cc[contrast[1]], "' - '", cc[contrast[2]], "'):\n\n", sep=""))
cc[contrast[1]], "' - '",
cc[contrast[2]], "'):\n\n",
sep = ""
))
if (!is.null(x$asso)) print(x$asso)
}
cat("\n")
Expand All @@ -227,25 +249,40 @@ summary.ate.targeted <- function(object, contrast=c(2:1), ...) {
} else {
cc <- object$estimate$coefmat
}
cc <- lava::estimate(coef=cc[, 1], vcov=diag(cc[, 2]^2, ncol=nrow(cc)), labels=rownames(cc))
cc <- lava::estimate(
coef = cc[, 1],
vcov = diag(cc[, 2]^2, ncol = nrow(cc)), labels = rownames(cc)
)
if (length(contrast)>2)
warning("Only the first two elements of 'contrast' are used")
if (object$npar[1]<2) contrast <- 1
asso <- NULL
if (length(contrast)>=2)
if (object$family$family=="binomial") {
asso <- estimate(object$estimate, function(x) c(x[contrast[1]]/x[contrast[2]],
lava::OR(x[contrast]),
x[contrast[1]]-x[contrast[2]]),
labels=c("RR", "OR", "RD"))
} else {
asso <- estimate(object$estimate, function(x) x[contrast[1]]-x[contrast[2]],
labels=c("ATE"))
}
if (length(contrast) >= 2) {
if (object$family$family == "binomial") {
asso <- estimate(object$estimate,
function(x) {
c(
x[contrast[1]] / x[contrast[2]],
lava::OR(x[contrast]),
x[contrast[1]] - x[contrast[2]]
)
},
labels = c("RR", "OR", "RD")
)
} else {
asso <- estimate(object$estimate,
function(x) x[contrast[1]] - x[contrast[2]],
labels = c("ATE")
)
}
}
structure(list(estimate=cc, npar=nn, type=object$type, asso=asso,
family=object$family,
names=c(object$names, "", "Outcome model:", "Propensity model:"),
all=object$all, formula=gsub("~", "~ ", unlist(lapply(object$formula, deparse))),
names=c(object$names, "",
"Outcome model:", "Propensity model:"),
all=object$all,
formula=gsub("~", "~ ",
unlist(lapply(object$formula, deparse))),
contrast=contrast),
class="summary.ate.targeted")
}
Loading

0 comments on commit a6e78f6

Please sign in to comment.