install.packages("INLA", repos=c(getOption("repos"), INLA="https://inla.r-inla-download.org/R/testing"), dep=TRUE)
install.packages("rjags")
install.packages("coda")
install.packages("eha")
install.packages("survival")
library(INLA)
library(rjags)
library(coda)
library(eha)
library(survival)
In this lab, we will conduct an analysis using Bayesian logistic and survival regression models estimated with INLA. We will use data from the Western Collaborative Group Study (WCGS), a study of the association between cardiovascular health and behavioral pattern conducted in a cohort of male volunteers. Subjects were recruited in 1960-1961 and followed for up to 9 years for onset of coronary heart disease (CHD). The scientific question of interest is whether behavioral pattern is associated with CHD. Specifically, investigators hypothesized that men with a “Type A” behavioral pattern would be more likely to experience CHD. Because this is an observational study design it is important to account for confounding due to many factors such as cigarette smoking and elevated BMI.
We will use the following variables from this data set:
age Age: age in years
behpat Behavior pattern: (A1, A2, B3, B4)
bmi Body mass index
chd Indicator of CHD at any time during follow-up: 0 = no; 1 = yes
chd01 Coronary heart disease within 5 years: 0 = no; 1 = yes
chol Cholesterol: mg/100 ml
dbp Diastolic blood pressure: mm Hg
dibpat Dichotomous behavior pattern: 0 = Type B; 1 = Type A
height Height: height in inches
id Subject ID
ncigs Smoking: Cigarettes/day
sbp Systolic blood pressure: mm Hg
time Time in days from baseline to onset of CHD
smoke: No = non-smoker; Yes = current smoker
weight Weight: pounds
You can download the data file and read it into R as follows:
wcgs <- read.csv("https://raw.githubusercontent.com/rhubb/SISCR2017/master/data/wcgs.csv", header = T)
wcgs$time <- wcgs$time/365
We will start by using logistic regression to analyze the association between behavioral pattern and onset of CHD within 5 years of baseline (chd01). Begin by conducting an exploratory data analysis to summarize the distribution of CHD, behavioral pattern, and possible confounders included in the data set. What conclusions do you reach regarding the role that number of cigarettes, BMI, and age may play in the analysis of the association between CHD and behavioral pattern?
#-- univariate tables for categorical variables
table(wcgs$chd01)/sum(table(wcgs$chd01))
table(wcgs$behpat)/sum(table(wcgs$behpat))
table(wcgs$smoke)/sum(table(wcgs$smoke))
#-- bivariate tables for categorical predictors and CHD
# CHD and behavioral pattern
table(wcgs$behpat,wcgs$chd01)
t(sweep(table(wcgs$chd01,wcgs$behpat),2,rowSums(table(wcgs$behpat,wcgs$chd01)),"/"))
# CHD and smoking
table(wcgs$smoke,wcgs$chd01)
t(sweep(table(wcgs$chd01,wcgs$smoke),2,rowSums(table(wcgs$smoke,wcgs$chd01)),"/"))
# Behavioral pattern and smoking
table(wcgs$behpat,wcgs$smoke)
t(sweep(table(wcgs$smoke,wcgs$behpat),2,rowSums(table(wcgs$behpat,wcgs$smoke)),"/"))
#-- summary statistics for continuous variables by CHD
# Age
tapply(wcgs$age,wcgs$chd01,mean)
tapply(wcgs$age,wcgs$chd01,sd)
boxplot(wcgs$age ~ wcgs$chd01, xlab = "CHD", ylab = "Age (years)")
# BMI
tapply(wcgs$bmi,wcgs$chd01,mean)
tapply(wcgs$bmi,wcgs$chd01,sd)
boxplot(wcgs$bmi ~ wcgs$chd01, xlab = "CHD", ylab = "BMI")
# Number of cigarettes
tapply(wcgs$ncigs,wcgs$chd01,mean)
tapply(wcgs$ncigs,wcgs$chd01,sd)
boxplot(wcgs$ncigs ~ wcgs$chd01, xlab = "CHD", ylab = "Number of Cigarettes")
Next, use Bayesian logistic regression to analyze the association between CHD and behavioral pattern, accounting for possible confounders based on your results from (1). We will explore results using several prior distributions. For each prior distribution can you think of a context in which this prior would be preferred? Compare your results to a classical logistic regression. How does the interpretation of the results differ for the Bayesian GLM compared to the frequentist GLM?
Normal(0,10) priors
# -- Normal priors for regression coefficients (with mean=0 and scale=10)
chd.n10 <- inla(chd01~ factor(behpat) + smoke + age, data=wcgs, family = "binomial",
control.fixed=list(mean.intercept=c(0),prec.intercept=c(1/10),mean=c(0,0),prec=rep(1/10,2)))
chd.n10$summary.fix
# -- Plot posterior densities
plot(chd.n10, plot.prior = TRUE)
Normal(0,0.1) priors
# -- Normal priors for regression coefficients (with mean=0 and scale=0.1), N(0,10) prior for intercept
chd.n01 <- inla(chd01~ factor(behpat) + smoke + age, data=wcgs, family = "binomial",
control.fixed=list(mean.intercept=c(0),prec.intercept=c(1/10),mean=rep(0,5),prec=rep(10,5)))
chd.n01$summary.fix
# -- Plot posterior densities
plot(chd.n01, plot.prior = TRUE)
Classical logistic regression
chd.glm1 <- glm(chd01~ factor(behpat) + smoke + age, data=wcgs, family=binomial)
summary(chd.glm1)
Using one of the Bayesian models you fit in (2), interpret your results. What do you conclude about the association between behavioral pattern and CHD? Does your choice of prior affect your conclusions? How does the interpretation of results for the Bayesian GLM differ from the results of the frequentist GLM?
# -- Exponentiate results to obtain odds ratios
exp(chd.n10$summary.fix)
Since some individuals were censored prior to the end of follow-up, a more appropriate way to analyze these data is with survival analysis. Analyze the association between time to onset of CHD and behavioral pattern, adjusting for the same confounders used in your logistic regression model using:
Cox proportional hazards model
chd.cph <- coxph(Surv(time, chd) ~ factor(behpat) + smoke + age, data=wcgs)
summary(chd.cph)
Parametric survival regression
chd.weib <- phreg(Surv(time, chd) ~ factor(behpat) + smoke + age, data=wcgs, dist = "weibull")
summary(chd.weib)
Bayesian non-parametric survival model
chd.np <- inla(inla.surv(time, chd) ~ factor(behpat) + factor(smoke) + age, family="coxph",data=wcgs,control.hazard=list(model="rw1", n.intervals=10))
summary(chd.np)
exp(chd.np$summary.fix)
# plot baseline hazard function
plot(chd.np$summary.random$baseline.hazard[,"ID"],
exp(chd.np$summary.fixed[1,1]+chd.np$summary.random$baseline.hazard[,"mean"]), type="S",
xlab = "time", ylab = "Baseline hazard")
Bayesian parametric survival model
chd.weib <- inla(inla.surv(time, chd) ~ factor(behpat) + factor(smoke) + age, family="weibullsurv",data=wcgs)
summary(chd.weib)
exp(chd.weib$summary.fix)
What do you conclude about the relationship between behavioral pattern and hazard of CHD? Are your conclusions affected by which estimation approach you chose? If so, which one would you prefer in this context and why?
In this lab, we will conduct a meta-analysis of 28 studies investigating the effect of interventions designed to reduce cholesterol on ischemic heart disease (IHD). The outcome of interest in these studies (IHD) was occurrence of fatal or non-fatal myocardial infarction.
We will use the following variables from this data set:
id Trial id
cholreduc Average cholesterol reduction in treated group - average reduction in control grup (mmol/l)
Y Number of IHD events
N Total number of participants
Trt Treatment group: 1 = Intervention, 0 = Control
You can download the data file and read it into R as follows:
chol <- read.csv("https://raw.githubusercontent.com/rhubb/SISCR2017/master/data/cholesterol.csv", header = T)
Meta-analysis ignoring between-trial heterogeneity
ma1 = inla(Y~ factor(Trt), data=chol, Ntrials=N, family="binomial")
summary(ma1)
# posterior median odds ratio
exp(ma1$summary.fixed[2,4])
Meta-analysis ignoring between-trial heterogeneity accounting for reductions in cholesterol level
# created centered cholesterol reduction variable
chol$chol.cent <- chol$ cholreduc-mean(chol$ cholreduc)
ma2 = inla(Y~ factor(Trt) + chol.cent, data=chol, Ntrials=N, family="binomial")
summary(ma2)
# posterior median odds ratio
exp(ma2$summary.fixed[2,4])
Meta-analysis accounting for between trial heterogeneity via fixed-effects
ma3 = inla(Y~ -1 + factor(id)+ factor(Trt), data=chol, Ntrials=N, family="binomial")
summary(ma3)
# posterior median odds ratio
exp(ma3$summary.fixed[29,4])
Meta-analysis accounting for between trial heterogeneity via random-effects
ma4 = inla(Y~ factor(Trt) + f(id, model = "iid", param = c(0.001,0.001)), data=chol, Ntrials=N, family="binomial")
summary(ma4)
# posterior median odds ratio
exp(ma4$summary.fixed[2,4])
Meta-analysis accounting for between trial heterogeneity via random-effects and cholesterol reduction
ma5 = inla(Y~ factor(Trt) + chol.cent + f(id, model = "iid", param = c(0.001,0.001)), data=chol, Ntrials=N, family="binomial")
summary(ma5)
# posterior median odds ratio
exp(ma5$summary.fixed[2,4])
Meta-analysis accounting for between trial heterogeneity via random-effects and allowing for moderation by cholesterol reduction
ma6 = inla(Y~ factor(Trt)*chol.cent + f(id, model = "iid", param = c(0.001,0.001)), data=chol, Ntrials=N, family="binomial")
summary(ma6)
# posterior median odds ratio at mean cholesterol reduction
exp(ma6$summary.fixed[2,4])
# effect of 1 mmol/l greater reduction in cholesterol on posterior median odds ratio
exp(ma6$summary.fixed[4,4])