## ----SLIDE 2----------------------------------------------------- # these packages are needed to execute the demo pkgs <- c("ltmle","SuperLearner") # see what packages are currently installed installed_pacakges <- row.names(installed.packages()) # loop over the needed packages for(p in pkgs){ # check if package is installed already_installed <- p %in% installed_pacakges # if not already installed, install it if(!already_installed){ install.packages(p) } # and load package library(p, character.only = TRUE) } ## ----SLIDE 3----------------------------------------------------- # set a seed for reproducibility set.seed(212) n <- 5000 A0 <- rbinom(n, size = 1, p = 0.5) L1 <- rnorm(n, mean = A0 + 1, sd = 1) A1 <- rbinom(n, size = 1, p = plogis(-1 + L1 + A0)) Y <- rnorm(n, mean = 1 + A1 + 2 * L1, 1) ## ----SLIDE 4----------------------------------------------------- # fit a regression of Y ~ A1 + L1 + A0 fit <- glm(Y ~ A1 + L1 + A0) # show results fit ## ----SLIDE 9----------------------------------------------------- # full data.frame full_data <- data.frame(A0 = A0, L1 = L1, A1 = A1, Y = Y) # subset data to observations with A0 = 1 & A1 = 1 data_11 <- subset(full_data, A0 == 1 & A1 == 1) # fit regression of Y ~ L1 fit_11 <- glm(Y ~ L1, data = data_11) fit_11 ## ----SLIDE 11----------------------------------------------------- betas11 <- round(as.numeric(fit_11$coef),2) ## ----SLIDE 12----------------------------------------------------- # get predicted value for everyone full_data$Q2n_11 <- predict(fit_11, newdata = full_data) # subset data to observations with A0 = 1 data_1 <- subset(full_data, A0 == 1) # fit regression fit_1 <- glm(Q2n_11 ~ 1, data = data_1) # intercept is estimate of E[Y(1,1)] fit_1 ## ----SLIDE 13----------------------------------------------------- # subset data to observations with A0 = a0 & A1 = a1 # fit regression of Y ~ L1 in A0/A1 subset data # get predicted value for everyone # subset data to observations with A0 = a0 # fit intercept-only regression in A0 subset data # intercept is estimate of E[Y(a0,a1)] ## ----SLIDE 13----------------------------------------------------- cfmean_gcomp <- function(a0, a1, full_data){ # subset data to observations with A0 = a0 & A1 = a1 data_a0a1 <- subset(full_data, A0 == a0 & A1 == a1) # fit regression of Y ~ L1 in A0/A1 subset data fit_a0a1 <- glm(Y ~ L1, data = data_a0a1) # get predicted value for everyone full_data$Q2n_a0a1 <- predict(fit_a0a1, newdata = full_data) # subset data to observations with A0 = a0 data_a0 <- subset(full_data, A0 == a0) # fit intercept-only regression in A0 subset data fit_a0 <- glm(Q2n_a0a1 ~ 1, data = data_a0) # intercept is estimate of E[Y(a0,a1)] return(as.numeric(fit_a0$coefficients)) } # evaluate the function EY11_gcomp <- cfmean_gcomp(a0 = 1, a1 = 1, full_data) EY10_gcomp <- cfmean_gcomp(a0 = 1, a1 = 0, full_data) EY01_gcomp <- cfmean_gcomp(a0 = 0, a1 = 1, full_data) EY00_gcomp <- cfmean_gcomp(a0 = 0, a1 = 0, full_data) ## ----SLIDE 14----------------------------------------------------- # should be ~ 6, 5, 4, 3 round(c(EY11_gcomp, EY10_gcomp, EY01_gcomp, EY00_gcomp), 2) ## ----SLIDE 16----------------------------------------------------- cfmean_iptw <- function(a0, a1, full_data){ # subset data to observations with A0 = a0 data_a0 <- subset(full_data, A0 == a0) # fit logistic regression of I(A1 = a1) ~ L1 in a0 subset ps_a1 <- glm(I(A1 == a1) ~ L1, data = data_a0, family = binomial()) # get predicted value for everybody full_data$phat_a1 <- predict(ps_a1, newdata = full_data, type = 'response') # fit regression of I(A0 = a0) ~ 1 in full_data ps_a0 <- glm(I(A0 == a0) ~ 1, data = full_data, family = binomial()) # get predicted value for everybody full_data$phat_a0 <- predict(ps_a0, newdata = full_data, type = 'response') # compute iptw estimator EYa0a1 <- with(full_data, mean( as.numeric(A0 == a0) * as.numeric(A1 == a1) / (phat_a0 * phat_a1) * Y )) # intercept is estimate of E[Y(a0,a1)] return(EYa0a1) } ## ----SLIDE 17----------------------------------------------------- # evaluate the function EY11_iptw <- cfmean_iptw(a0 = 1, a1 = 1, full_data) EY10_iptw <- cfmean_iptw(a0 = 1, a1 = 0, full_data) EY01_iptw <- cfmean_iptw(a0 = 0, a1 = 1, full_data) EY00_iptw <- cfmean_iptw(a0 = 0, a1 = 0, full_data) # should be ~ 6,5,4,3 round(c(EY11_iptw, EY10_iptw, EY01_iptw, EY00_iptw),2) ## ----SLIDE 19----------------------------------------------------- # set seed for reproducibility & set sample size of 500 set.seed(212); n <- 500 # baseline variables L0 <- data.frame(L01 = rnorm(n), L02 = rbinom(n, 1, 0.5)) # first treatment gA0 <- plogis(0.2 * L0$L01 - 0.2 * L0$L02) A0 <- rbinom(n = n, size = 1, prob = gA0) # intermediate variable at time 1 L1 <- rnorm(n = n, mean = -A0 + L0$L01 - L0$L02, sd = 1) # second treatment decision gA1 <- plogis(0.2 * A0 - L1 + L0$L01) A1 <- rbinom(n = n, size = 1, prob = gA1) # intermediate variable at time 2 L2 <- rnorm(n = n, mean = -A0*A1 + 2*A1 - L0$L01 + L1, sd = 2) # third treatment decision gA2 <- plogis(A0 - A1 + 2*A0*A1 - L0$L01 + 0.2 * L1*L0$L02) A2 <- rbinom(n = n, size = 1, prob = gA2) # outcome Y <- rnorm(n = n, mean = L0$L01 * L0$L02 * L2 - A0 - A1 - A2*A0*L2, sd = 2) # put into a data frame full_data <- data.frame(L0, A0 = A0, L1 = L1, A1 = A1, L2 = L2, A2 = A2, Y = Y) ## ----SLIDE 20----------------------------------------------------- head(full_data) ## ----SLIDE 22----------------------------------------------------- set.seed(123) ltmle_fit1 <- ltmle( data = full_data, Anodes = c("A0", "A1", "A2"), Lnodes = c("L01","L02","L1","L2"), Ynodes = "Y", SL.library = list(Q = c("SL.earth", "SL.glm", "SL.mean"), g = c("SL.earth", "SL.glm", "SL.mean")), stratify = FALSE, abar = list(treatment = c(1,1,1), control = c(0,0,0)) ) ## ----SLIDE 25----------------------------------------------------- summary(ltmle_fit1) ## ----SLIDE 27----------------------------------------------------- # weights for outcome regressions, because we set stratify = FALSE, the output in # ltmle_fit1$fit$Q[[1]] is the same as in ltmle_fit1$fit$Q[[2]] ltmle_fit1$fit$Q[[1]] ## ----SLIDE 28----------------------------------------------------- # weights for propensity scores, because we set stratify = FALSE, the output in # ltmle_fit1$fit$g[[1]] is the same as in ltmle_fit1$fit$g[[2]] ltmle_fit1$fit$g[[1]] ## ----SLIDE 32----------------------------------------------------- set.seed(12) # censoring prior to time 1 (1 = censored) gC1 <- plogis(-2 + 0.05 * L0$L01) C1 <- rbinom(n = n, size = 1, prob = gC1) # censoring prior to time 2 (1 = censored) gC2 <- plogis(-3 + 0.05 * A0 + 0.025 * L1 - 0.025 * L0$L02) C2 <- rbinom(n = n, size = 1, prob = gC2) # censoring prior to time 3 (1 = censored) gC3 <- plogis(-3.5 + 0.05*A0*A1 - 0.025*L2 + 0.025 * L1) C3 <- rbinom(n = n, size = 1, prob = gC3) # make a cumulative indicator of censoring anyC1 <- C1 == 1; anyC2 <- C1 == 1 | C2 == 1 anyC3 <- C1 == 1 | C2 == 1 | C3 == 1 # censored data set cens_data <- data.frame(L0, A0 = A0, C1 = BinaryToCensoring(is.censored = C1), L1 = ifelse(anyC1, NA, L1), A1 = ifelse(anyC1, NA, A1), C2 = BinaryToCensoring(is.censored = ifelse(anyC1, NA, C2)), L2 = ifelse(anyC2, NA, L2), A2 = ifelse(anyC2, NA, A2), C3 = BinaryToCensoring(is.censored = ifelse(anyC2, NA, C3)), Y = ifelse(anyC3, NA, Y)) ## ----SLIDE 33----------------------------------------------------- head(cens_data, 9) ## ----SLIDE 34----------------------------------------------------- set.seed(123) ltmle_fit2 <- ltmle( data = cens_data, Anodes = c("A0", "A1", "A2"), Lnodes = c("L01","L02","L1","L2"), Cnodes = c("C1","C2","C3"), Ynodes = "Y", SL.library = list(Q = c("SL.earth", "SL.glm", "SL.mean"), g = c("SL.earth", "SL.glm", "SL.mean")), stratify = FALSE, abar = list(treatment = c(1,1,1), control = c(0,0,0)) ) ## ----SLIDE 35----------------------------------------------------- summary(ltmle_fit2) ## ----SLIDE 38----------------------------------------------------- rule1 <- function(pt_data){ # all patients start on control A0 <- 0 # patients get treatment at time 1 if L1 > -1 # set patients with missing L1 to NA if(!is.na(pt_data$L1)){ A1 <- ifelse(pt_data$L1 > -1, 1, 0) }else{ A1 <- NA } # patients get treatment at time 2 if L2 > -1 # set patients with missing L2 to NA if(!is.na(pt_data$L1)){ A2 <- ifelse(pt_data$L2 > -1, 1, 0) }else{ A2 <- NA } return(c(A0,A1,A2)) } ## ----SLIDE 39----------------------------------------------------- rule2 <- function(pt_data){ # all patients start on control A0 <- 0 # and stay on control unless censored A1 <- ifelse(is.na(pt_data$L1), NA, 0) A2 <- ifelse(is.na(pt_data$L2), NA, 0) return(c(A0,A1,A2)) } ## ----SLIDE 40----------------------------------------------------- set.seed(123) ltmle_fit3 <- ltmle( data = cens_data, Anodes = c("A0", "A1", "A2"), Lnodes = c("L01","L02","L1","L2"), Cnodes = c("C1","C2","C3"), Ynodes = "Y", stratify = FALSE, SL.library = list(Q = c("SL.earth", "SL.glm", "SL.mean"), g = c("SL.earth", "SL.glm", "SL.mean")), rule = list(treatment = rule1, control = rule2) ) ## ----SLIDE 41----------------------------------------------------- summary(ltmle_fit3)