## Author: Eric Morenz ## Date: 2020/07/23 ## Fitting parametric models, plotting different summaries, kernel methods library(dplyr) ## If you get an error. Please run: install.packages("&package_name") library(readxl) ## To read .xls files library(survival) ## Classic and well known library(flexsurv) ## mostly an extension of survival package. library(biostat3) ### One package we can use for kernel estimate of hazard ## library(kernhaz) ### Another package for kernel estimates of the hazard setwd(here::here()) ### Reading into memory part of the data df <- read_excel("PS_Datasets/pbc.xls") %>% select(N, X, D, Z1) %>% rename(ID = N, Time = X, Death = D, Trt = Z1) %>% filter(ID < 200) #### Creating a survival object. This is one way to do it. surv_obj <- df %>% with(Surv(time=Time, event=Death, type="right")) #### Same Kaplan-Meier code as before but with two groups survfit_obj <- survfit(surv_obj ~ Trt, data=df) ## ## Kaplan-Meier plot without CIs plot(survfit_obj, col=c( 'pink', 'blue'), conf.int=FALSE, lty=c("solid", "dashed"), main="Kaplan-Meier Survival Estimate", ylab="Survival Probability", xlab="Time") legend("topright", c("Treatment Group 1", "Treatment Group 2"), col=c("pink", "blue"), lty=c("solid", "solid"), cex=0.9, bty="n") #################### What about the cumulative hazard ######### plot(survfit_obj, fun="cumhaz", col=c( 'pink', 'blue'), lwd=2, xlab="Days", ylab="Cumulative Hazard", main="Nelson−Aalen cumulative hazard estimates") ### Without a legend ################## ################## ################## ########### Let's fit a few parametric models ########## ################## ################## ################## exponential_fit <- flexsurvreg(surv_obj ~ 1, dist="exponential") ## from flexsurv weibull_fit <- flexsurvreg(surv_obj ~ 1, dist="weibull") ## splinefit <- flexsurvspline(surv_obj ~ 1) ## flexsurv has a spline fit aswell ########### Let's compare the model fit for the survival function plot(survfit(surv_obj ~ 1, data=df), conf.int=FALSE, xlab="Time", ylab="Survival probability", col="black", lty="solid", lwd=2) ## nonparametric lines(exponential_fit, ci=FALSE, col="darkgreen", lty="dotted", lwd=2) lines(weibull_fit, ci=FALSE, col="orange", lty="dashed", lwd=2) legend(x=3300, y=1, bty="n", c("Nonparametric", "Exponential", "Weibull"), col=c("black", "darkgreen", "orange"), lty=c("solid", "dotted", "dashed"), lwd=rep(3, 4), cex=0.9) ########## For parametric models we can plot the hazard estimates ########## This is not possible for the nonparametric KM estimator (why?) ########## We will use smoothing techniques to get an estimate of the hazard fxn ### Ignoring groups survfit_obj2 <- survfit(surv_obj ~ 1, data=df) kernelfit_haz_bias <- smoothHaz(survfit_obj2, n.grid=3) ## randomly chosen kernelfit_haz_variance <- smoothHaz(survfit_obj2, n.grid=50) ## randomly chosen ### Tuning of the bandiwdth is a little beyond the course. ### But I would be happy to discuss techniques in Slack ### We now plot the 'nonparametric' estimate of the hazard with parametric hazards time_seq <- seq(1, 5000, by=10) shape <- exp(weibull_fit$coef[1]) scale <- exp(weibull_fit$coef[2]) plot(kernelfit_haz_variance)### Plot of the hazard with respect to the epanechnikov kernel lines(kernelfit_haz_bias, lwd=3) lines(time_seq, rep(exp(exponential_fit$coeff), length(time_seq)), col='darkgreen', lty="dotted", lwd=2) ## exponential lines(time_seq, hweibull(time_seq, shape, scale, log = FALSE), col='orange', lty="dashed", lwd=2) ## weibull legend(x=0, y=0.00038, bty="n", lwd=c(3,1,1,1), c("Larger Bias", "Larger Variance", "Exponential", "Weibull"), col=c("black", "black", "darkgreen", "orange"), lty=c("solid", "solid", "dotted", "dashed"), cex=0.9)