fit_mechanisms_rule_ate.R

utils::globalVariables(c("..w_names", "A"))

#' Fit propensity scores for treatment contrasts
#'
#' @param train_data A \code{data.table} containing the observed data; columns
#'  are in the order specified by the NPSEM (Y, M, Z, A, W), with column names
#'  set appropriately based on the input data. Such a structure is merely a
#'  convenience utility to passing data around to the various core estimation
#'  routines and is automatically generated \code{\link{medoutcon}}.
#' @param valid_data A holdout data set, with columns exactly matching those
#'  appearing in the preceding argument \code{train_data}, to be used for
#'  estimation via cross-fitting. Optional, defaulting to \code{NULL}.
#' @param contrast A \code{numeric} double indicating the two values of the
#'  intervention \code{A} to be compared. The default value of \code{c(0, 1)}
#'  assumes a binary intervention node \code{A}.
#' @param learners \code{\link[sl3]{Stack}}, or other learner class (inheriting
#'  from \code{\link[sl3]{Lrnr_base}}), containing a set of learners from
#'  \pkg{sl3}, to be used in fitting a propensity score models, i.e., g :=
#'  P(A = 1 | W) and h := P(A = 1 | M, W).
#' @param w_names A \code{character} vector of the names of the columns that
#'  correspond to baseline covariates (W). The input for this argument is
#'  automatically generated by \code{\link{medoutcon}}.
#'
#' @importFrom data.table as.data.table copy setnames ":="
#' @importFrom sl3 sl3_Task
#'
fit_ruleclass_mech <- function(train_data,
                           valid_data = NULL,
                           learners,
                           w_names) {
  cov_names <- w_names

  ## construct task for treatment mechanism fit
  treat_task <- sl3::sl3_Task$new(
    data = train_data,
    weights = "obs_weights",
    covariates = cov_names,
    outcome = "Dclass",
    outcome_type = "binomial"
  )

  ## fit and predict treatment mechanism
  treat_fit <- learners$train(treat_task)
  treat_pred <- treat_fit$predict()

  ## use full data for prediction if no validation data provided
  if (is.null(valid_data)) {
    out_treat_est <- data.table::as.data.table(treat_pred)
    data.table::setnames(out_treat_est, c(
      "treat_pred"
    ))

    ## output
    out <- list(
      treat_est = out_treat_est,
      treat_fit = treat_fit
    )
  } else {
    out_treat_est <- lapply(
      list(train_data, valid_data),
      function(data) {
        ## create task to generate contrast-specific predictions
        treat_task <- sl3::sl3_Task$new(
          data = data,
          weights = "obs_weights",
          covariates = cov_names,
          outcome = "Dclass",
          outcome_type = "binomial"
        )

        ## predictions for training data
        treat_pred <- treat_fit$predict(treat_task)

        ## bounding to numerical precision and for positivity considerations

        out_treat_est <- treat_pred

        out_treat_est <- data.table::as.data.table(out_treat_est)
        data.table::setnames(out_treat_est, c(
          "treat_pred"
        ))
       })

    ## output
    out <- list(
      treat_est_train = out_treat_est[[1]],
      treat_est_valid = out_treat_est[[2]],
      treat_fit = treat_fit
    )
  }
  return(out)
}


fit_rule_mech <- function(train_data,
                           valid_data = NULL,
                           learners,
                           w_names) {
  cov_names <- w_names

  ## construct task for treatment mechanism fit
  treat_task <- sl3::sl3_Task$new(
    data = train_data,
    weights = "obs_weights",
    covariates = cov_names,
    outcome = "D1",
    outcome_type = "continuous"
  )

  ## fit and predict treatment mechanism
  treat_fit <- learners$train(treat_task)
  treat_pred <- treat_fit$predict()

  ## use full data for prediction if no validation data provided
  if (is.null(valid_data)) {
    out_treat_est <- data.table::as.data.table(treat_pred)
    data.table::setnames(out_treat_est, c(
      "treat_pred"
    ))

    ## output
    out <- list(
      treat_est = out_treat_est,
      treat_fit = treat_fit
    )
  } else {
    out_treat_est <- lapply(
      list(train_data, valid_data),
      function(data) {
        ## create task to generate contrast-specific predictions
        treat_task <- sl3::sl3_Task$new(
          data = data,
          weights = "obs_weights",
          covariates = cov_names,
          outcome = "D1",
          outcome_type = "continuous"
        )

        ## predictions for training data
        treat_pred <- treat_fit$predict(treat_task)

        ## bounding to numerical precision and for positivity considerations

        out_treat_est <- treat_pred

        out_treat_est <- data.table::as.data.table(out_treat_est)
        data.table::setnames(out_treat_est, c(
          "treat_pred"
        ))
       })

    ## output
    out <- list(
      treat_est_train = out_treat_est[[1]],
      treat_est_valid = out_treat_est[[2]],
      treat_fit = treat_fit
    )
  }
  return(out)
}

fit_treat_mech <- function(train_data,
                           valid_data = NULL,
                           contrast,
                           learners,
                           w_names) {

    cov_names <- w_names

  ## construct task for treatment mechanism fit
  treat_task <- sl3::sl3_Task$new(
    data = train_data,
    weights = "obs_weights",
    covariates = cov_names,
    outcome = "A",
    outcome_type = "binomial"
  )

  ## fit and predict treatment mechanism
  treat_fit <- learners$train(treat_task)
  treat_pred <- treat_fit$predict()

  ## use full data for prediction if no validation data provided
  if (is.null(valid_data)) {
    treat_pred_A_prime <- train_data$C1 * treat_pred +
      (1 - train_data$C1) * (1 - treat_pred)

    ## bounding to numerical precision and for positivity considerations
      x_precise <- bound_precision(treat_pred_A_prime)
      out_treat_est <- bound_propensity(x_precise)

    out_treat_est <- data.table::as.data.table(out_treat_est)
    data.table::setnames(out_treat_est, c(
      "treat_pred_A_prime"
    ))

    ## output
    out <- list(
      treat_est = out_treat_est,
      treat_fit = treat_fit
    )
  } else {
    out_treat_est <- lapply(
      list(train_data, valid_data),
      function(data) {
        ## create task to generate contrast-specific predictions
        treat_task <- sl3::sl3_Task$new(
          data = data,
          weights = "obs_weights",
          covariates = cov_names,
          outcome = "A",
          outcome_type = "binomial"
        )

        ## predictions for training data
        treat_pred <- treat_fit$predict(treat_task)

        treat_pred_A_prime <- data[,"C1"] * treat_pred +
          (1 - data[,"C1"]) * (1 - treat_pred)

          x_precise <- bound_precision(treat_pred_A_prime)
          out_treat_est <- bound_propensity(x_precise)

        out_treat_est <- data.table::as.data.table(out_treat_est)
        data.table::setnames(out_treat_est, c(
          "treat_pred_A_prime"
        ))
      }
    )

    ## output
    out <- list(
      treat_est_train = out_treat_est[[1]],
      treat_est_valid = out_treat_est[[2]],
      treat_fit = treat_fit
    )
  }
  return(out)
}

###############################################################################

#' Fit outcome regression
#'
#' @param train_data A \code{data.table} containing the observed data, with
#'  columns in the order specified by the NPSEM (Y, M, Z, A, W), with column
#'  names set based on the original input data. Such a structure is merely a
#'  convenience utility to passing data around to the various core estimation
#'  routines and is automatically generated \code{\link{medoutcon}}.
#' @param valid_data A holdout data set, with columns exactly matching those
#'  appearing in the preceding argument \code{data}, to be used for estimation
#'  via cross-fitting. Optional, defaulting to \code{NULL}.
#' @param contrast A \code{numeric} double indicating the two values of the
#'  intervention \code{A} to be compared. The default of \code{c(0, 1)} assumes
#'  a binary intervention node \code{A}.
#' @param learners \code{\link[sl3]{Stack}}, or other learner class (inheriting
#'  from \code{\link[sl3]{Lrnr_base}}), containing a set of learners from
#'  \pkg{sl3}, to be used in fitting the outcome regression, i.e., b(A,Z,M,W).
#' @param w_names A \code{character} vector of the names of the columns that
#'  correspond to baseline covariates (W). The input for this argument is
#'  automatically generated by \code{\link{medoutcon}}.
#'
#' @importFrom data.table as.data.table copy setnames ":="
#' @importFrom sl3 sl3_Task
fit_out_mech <- function(train_data,
                         valid_data = NULL,
                         contrast,
                         learners,
                         w_names) {
  ##  construct task for propensity score fit
  b_natural_task <- sl3::sl3_Task$new(
    data = train_data,
    weights = "obs_weights",
    covariates = c("A", w_names),
    outcome = "Y"
  )

  ## fit and predict
  b_natural_fit <- learners$train(b_natural_task)
  b_natural_pred <- b_natural_fit$predict()

  ## use full data for counterfactual prediction if no validation data given
  if (is.null(valid_data)) {
    ## set intervention to first contrast a_prime := contrast[1]
    C1<-train_data$C1
    train_data_intervene <- data.table::copy(train_data)
    train_data_intervene[, A := C1]

    ## predictions on observed data (i.e., under observed treatment status)
    b_natural_pred <- b_natural_fit$predict()

    ## create task for post-intervention outcome regression
    b_intervened_prime_task <- sl3::sl3_Task$new(
      data = train_data_intervene,
      weights = "obs_weights",
      covariates = c("A", w_names),
      outcome = "Y"
    )

    ## predict from trained model on counterfactual data
    b_intervened_pred_A_prime <- b_natural_fit$predict(b_intervened_prime_task)

    ## output
    out_b_est <- data.table::as.data.table(cbind(
      b_natural_pred,
      b_intervened_pred_A_prime
    ))
    data.table::setnames(out_b_est, c(
      "b_pred_A_natural",
      "b_pred_A_prime"
    ))

    ## output
    out <- list(
      b_est = out_b_est,
      b_fit = b_natural_fit
    )
  } else {
    ## copy both training and validation data, once for each contrast
    train_data_intervene <- data.table::copy(train_data)
    valid_data_intervene <- data.table::copy(valid_data)

    ## predictions on observed data (i.e., under observed treatment status)
    b_natural_pred_train <- b_natural_fit$predict()
    b_natural_task_valid <- sl3::sl3_Task$new(
      data = valid_data,
      weights = "obs_weights",
      covariates = c("A", w_names),
      outcome = "Y"
    )
    b_natural_pred_valid <- b_natural_fit$predict(b_natural_task_valid)

    ## set intervention to first contrast a' := contrast[1]
    out_b_est <- lapply(
      list(train_data_intervene, valid_data_intervene),
      function(data_intervene) {
        ## set intervention to first contrast a' := contrast[1]
        C1<-data_intervene$C1
        data_intervene[, A := C1]
        b_intervened_prime_task <- sl3::sl3_Task$new(
          data = data_intervene,
          weights = "obs_weights",
          covariates = c("A", w_names),
          outcome = "Y"
        )

        ## predict from trained model on counterfactual data
        b_intervened_pred_A_prime <-
          b_natural_fit$predict(b_intervened_prime_task)

        ## output
        out_b_est <- data.table::as.data.table(cbind(
          b_intervened_pred_A_prime
        ))
        return(out_b_est)
      }
    )

    ## add natural treatment estimates to post-intervention predictions
    out_b_est[[1]] <- cbind(b_natural_pred_train, out_b_est[[1]])
    out_b_est[[2]] <- cbind(b_natural_pred_valid, out_b_est[[2]])
    lapply(out_b_est, function(x) {
      data.table::setnames(x, c(
        "b_pred_A_natural",
        "b_pred_A_prime"
      ))
    })

    ## output
    out <- list(
      b_est_train = out_b_est[[1]],
      b_est_valid = out_b_est[[2]],
      b_fit = b_natural_fit
    )
  }
  return(out)
}