## ----SLIDE 2----------------------------------------------------- # these packages are needed to execute the demo pkgs <- c("drtmle","earth","SuperLearner","nloptr", "quadprog","plotmo","plotrix","TeachingDemos") # see what packages are currently installed installed_pacakges <- row.names(installed.packages()) # loop over the needed packages for(p in pkgs){ # check if package is installed already_installed <- p %in% installed_pacakges # if not already installed, install it if(!already_installed){ install.packages(p) } # and load package library(p, character.only = TRUE) } ## ----SLIDE 3----------------------------------------------------- # set a seed for reproducibility set.seed(212) # sample size n <- 300 # W1 has Normal distribution, W2 has Uniform distribution W1 <- rnorm(n); W2 <- runif(n) # make a data.frame of covariates W <- data.frame(W1 = W1, W2 = W2) # pr(A = 1 | W) is logistic linear in W g1W <- plogis(-1 + W1 - W2 + W1*W2) # generate binary treatment A <- rbinom(n, 1, g1W) # E[Y | A, W] is logistic linear in A, W QAW <- plogis(W1 - W2 + A) # generate outcome by adding random error Y <- rbinom(n, 1, QAW) ## ----SLIDE 6----------------------------------------------------- set.seed(123) fit1 <- drtmle(W = W, A = A, Y = Y, a_0 = c(0,1), family = binomial(), SL_g = c("SL.earth", "SL.glm"), SL_Q = c("SL.earth", "SL.glm"), SL_gr = c("SL.earth", "SL.glm"), SL_Qr = c("SL.earth", "SL.glm"), stratify = FALSE) fit1 ## ----SLIDE 6----------------------------------------------------- ci(fit1) ## ----SLIDE 7----------------------------------------------------- ci(fit1, contrast = c(-1, 1)) ## ----SLIDE 12----------------------------------------------------- riskRatio <- list(f = function(eff){ log(eff) }, f_inv = function(eff){ exp(eff) }, h = function(est){ est[1]/est[2] }, fh_grad = function(est){ c(1/est[1],-1/est[2]) }) ci(fit1, contrast = riskRatio) ## ----SLIDE 13----------------------------------------------------- logitMean <- list(f = function(eff){ qlogis(eff) }, f_inv = function(eff){ plogis(eff) }, h = function(est){ est[1] }, fh_grad = function(est){ c(1/(est[1] - est[1]^2), 0) }) ci(fit1, contrast = logitMean) ## ----SLIDE 14----------------------------------------------------- wald_test(fit1, null = c(0.5, 0.6)) ## ----SLIDE 15----------------------------------------------------- # test ATE = 0 wald_test(fit1, contrast = c(1, -1)) # test ATE = 0.1 wald_test(fit1, contrast = c(1, -1), null = 0.1) ## ----SLIDE 16----------------------------------------------------- wald_test(fit1, contrast = riskRatio, null = 1) ## ----SLIDE 20----------------------------------------------------- screen_confounders <- function(Y, X, family, trt_name = "A", change = 0.1, ...){ # fit treatment only model & get coefficient for treatment fit_init <- glm(as.formula(paste0("Y ~ ", trt_name)), data = X, family = family) trt_coef <- fit_init$coef[2] # identify which column of X is the trt variable trt_col <- which(colnames(X) == trt_name) # set all variables except trt to not be included initially include <- rep(FALSE, ncol(X)); include[trt_col] <- TRUE # loop over variables in X other than trt for(j in seq_len(ncol(X))[-trt_col]){ # find variable name var_name <- colnames(X)[j] # fit trt + variable model, get trt coefficient fit <- glm(as.formula(paste0("Y ~ ", trt_name, "+", var_name)), data = X, family = family) new_trt_coef <- fit$coef[2] # check if changed more than specified amount include[j] <- abs((new_trt_coef - trt_coef)/trt_coef) > change } return(include) } ## ----SLIDE 21----------------------------------------------------- SL.screened_regression <- function(Y, X, newX, family, ...){ # screen columns of X using screen_confounders include_cols <- screen_confounders(Y = Y, X = X, family = family) # fit main terms glm with only those columns fitted_glm <- glm(Y ~ ., data = X[ , include_cols], family = family) # get predictions pred <- predict(fitted_glm, newdata = newX[ , include_cols], type = "response") # format output out <- list(fit = list(fitted_model = fitted_glm, include_cols = include_cols), pred = pred) # assign class class(out$fit) <- "SL.screened_regression" return(out) } ## ----SLIDE 22----------------------------------------------------- predict.SL.screened_regression <- function(object, newdata, ...){ pred <- predict(object$fitted_model, newdata = newdata[ , object$include_cols], type = "response") return(pred) } ## ----SLIDE 23----------------------------------------------------- set.seed(123) fit2 <- drtmle(W = W, A = A, Y = Y, a_0 = c(0,1), family = gaussian(), # specify main terms logistic regression via glm_g glm_g = "W1 + W2", # specify custom outcome regression via SL_Qs SL_Q = "SL.screened_regression", # the residual regression stay the same SL_gr = c("SL.earth", "SL.glm", "SL.mean"), SL_Qr = c("SL.earth", "SL.glm", "SL.mean"), stratify = FALSE) fit2 ## ----SLIDE 24----------------------------------------------------- set.seed(123) DeltaA <- rbinom(n, 1, plogis(2 + W$W1)) DeltaY <- rbinom(n, 1, plogis(2 + W$W2 + A)) A[DeltaA == 0] <- NA Y[DeltaY == 0] <- NA ## ----SLIDE 25----------------------------------------------------- set.seed(123) fit3 <- drtmle(W = W, A = A, Y = Y, a_0 = c(0,1), family = binomial(), SL_g = list(DeltaA = c("SL.earth", "SL.glm", "SL.mean"), A = c("SL.earth", "SL.glm", "SL.mean"), DeltaY = c("SL.glm", "SL.mean")), SL_Q = c("SL.earth", "SL.glm", "SL.mean"), SL_gr = c("SL.earth", "SL.glm", "SL.mean"), SL_Qr = c("SL.earth", "SL.glm", "SL.mean"), stratify = FALSE) ## ----SLIDE 26----------------------------------------------------- fit3 # calls to ci and wald_test are same as before ci(fit3) ## ----SLIDE 28----------------------------------------------------- set.seed(1234) n <- 300 W <- data.frame(W1 = runif(n), W2 = rbinom(n, 1, 0.5)) A <- rbinom(n, 2, plogis(W$W1 + W$W2)) Y <- rbinom(n, 1, plogis(W$W1 + W$W2*A)) fit4 <- drtmle(W = W, A = A, Y = Y, stratify = FALSE, SL_Q = c("SL.earth", "SL.glm"), SL_g = c("SL.earth", "SL.glm"), SL_Qr = c("SL.earth", "SL.glm"), SL_gr = c("SL.earth", "SL.glm"), family = binomial(), a_0 = c(0,1,2)) ## ----SLIDE 29----------------------------------------------------- fit4 ## ----SLIDE 30----------------------------------------------------- ci(fit4) wald_test(fit4, null = c(0.4, 0.5, 0.6)) ## ----SLIDE 31----------------------------------------------------- ci(fit4, contrast = c(-1, 1, 0)) ci(fit4, contrast = c(-1, 0, 1)) ## ----SLIDE 32----------------------------------------------------- riskRatio_1v0 <- list(f = function(eff){ log(eff) }, f_inv = function(eff){ exp(eff) }, h = function(est){ est[2]/est[1] }, fh_grad = function(est){ c(1/est[2], -1/est[1], 0) }) ci(fit4, contrast = riskRatio_1v0) ## ----SLIDE 33----------------------------------------------------- riskRatio_2v0 <- list(f = function(eff){ log(eff) }, f_inv = function(eff){ exp(eff) }, # note now comparing 3rd to 1st estimate h = function(est){ est[3]/est[1] }, fh_grad = function(est){ c(0, -1/est[1], 1/est[3]) }) ci(fit4, contrast = riskRatio_2v0)