1 Preliminaries

We will work with the STAT data from the STAT.RData file.

load('STAT.RData')
library(phyloseq); packageVersion("phyloseq")
## [1] '1.28.0'

Remember the PCoA Results from last time

library(ade4); packageVersion("ade4")
## [1] '1.7.13'
## Compute Bray-Curtis distance matrix
bray.dist = phyloseq::distance(phy, "bray")

## PCoA results
bray.pco = dudi.pco(cailliez(bray.dist), scannf=F, nf=2)

s.class(bray.pco$li, sample_data(phy)$Location)

s.class(bray.pco$li, sample_data(phy)$Treatment)

s.class(bray.pco$li, with(sample_data(phy), 
                         interaction(Location,Treatment)))

There seems to be a big difference in terms of Location variable and slightly uncertain differences with respect to Treatment.

2 PERMANOVA

PERMANOVA is implemented in the vegan library. The function is adonis.

2.1 Vanilla

Let’s first run simple analysis with respect to Location.

library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-5
with(sample_data(phy), adonis(bray.dist ~ Location))
## 
## Call:
## adonis(formula = bray.dist ~ Location) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Location   1    4.0251  4.0251  43.899 0.31834  0.001 ***
## Residuals 94    8.6188  0.0917         0.68166           
## Total     95   12.6439                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The result is an analysis of variance table that lists pseudo-F statistic, R^2 and significance values.

We can do the same for the Treatment variable.

with(sample_data(phy), 
     adonis(formula = bray.dist ~ Treatment))
## 
## Call:
## adonis(formula = bray.dist ~ Treatment) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)    
## Treatment  4     1.521 0.38026   3.111 0.1203  0.001 ***
## Residuals 91    11.123 0.12223         0.8797           
## Total     95    12.644                 1.0000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.2 Stratified

Notice that statistically the above analyses are suboptimal because they do not take into account the fact that microbiota is measured in two different locations of the same animal. Also note that we have previously decided that Location differences are not as interesting to us, but Treatment differences are. We can account for these considerations by stratifying the permutations. What this means is that the permutations will only occur at the levels of the strata (e.g. animals, or locations), effectively removing the effect of these stratifying variables from the analysis.

Let’s first do the stratified analysis for Location variable accounting for animal from which the samples are taken. We may want to do this because we expect the microbiota to be more similar within each animal than across.

head(sample_data(phy))
##           X.SampleID BarcodeSequence LinkerPrimerSequence Treatment
## cecal_C1    cecal_C1              NA                   NA         C
## cecal_C10  cecal_C10              NA                   NA         C
## cecal_C2    cecal_C2              NA                   NA         C
## cecal_C3    cecal_C3              NA                   NA         C
## cecal_C4    cecal_C4              NA                   NA         C
## cecal_C5    cecal_C5              NA                   NA         C
##           Location         Description
## cecal_C1     cecal missing_description
## cecal_C10    cecal missing_description
## cecal_C2     cecal missing_description
## cecal_C3     cecal missing_description
## cecal_C4     cecal missing_description
## cecal_C5     cecal missing_description

Since the animal identified is not coded we need to parse it out of sample.names. Note that each sample is “coded” as location_animal.

sample_data(phy)$Mouse = sapply(strsplit(sample_names(phy), split = "_"), "[", 2)
head(sample_data(phy))
##           X.SampleID BarcodeSequence LinkerPrimerSequence Treatment
## cecal_C1    cecal_C1              NA                   NA         C
## cecal_C10  cecal_C10              NA                   NA         C
## cecal_C2    cecal_C2              NA                   NA         C
## cecal_C3    cecal_C3              NA                   NA         C
## cecal_C4    cecal_C4              NA                   NA         C
## cecal_C5    cecal_C5              NA                   NA         C
##           Location         Description Mouse
## cecal_C1     cecal missing_description    C1
## cecal_C10    cecal missing_description   C10
## cecal_C2     cecal missing_description    C2
## cecal_C3     cecal missing_description    C3
## cecal_C4     cecal missing_description    C4
## cecal_C5     cecal missing_description    C5

Now we can perform the analysis stratified on the newly created Mouse variable.

with(sample_data(phy), adonis(bray.dist ~ Location, strata = Mouse))
## 
## Call:
## adonis(formula = bray.dist ~ Location, strata = Mouse) 
## 
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Location   1    4.0251  4.0251  43.899 0.31834  0.001 ***
## Residuals 94    8.6188  0.0917         0.68166           
## Total     95   12.6439                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Compare these results with what we saw before.

As before we can visualize these results using PCoA with within class analysis.

mouse.wca = wca(bray.pco, factor(sample_data(phy)$Mouse), scannf=F, nf=2)

s.class(mouse.wca$li, sample_data(phy)$Location)

Also note what the effect of wca is on the class factor.

par(mfrow=c(1,2))
s.class(bray.pco$li, factor(sample_data(phy)$Mouse), sub = "Vanilla")
s.class(mouse.wca$li, factor(sample_data(phy)$Mouse), sub = "With stratification")

Similar analysis can be done for the Treatment variable accounting for the Location differences.

with(sample_data(phy), adonis(bray.dist ~ Treatment, strata = Location))
## 
## Call:
## adonis(formula = bray.dist ~ Treatment, strata = Location) 
## 
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model     R2 Pr(>F)    
## Treatment  4     1.521 0.38026   3.111 0.1203  0.001 ***
## Residuals 91    11.123 0.12223         0.8797           
## Total     95    12.644                 1.0000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
location.wca = wca(bray.pco, sample_data(phy)$Location, scannf=F, nf=2)
s.class(location.wca$li, sample_data(phy)$Treatment)

Notice the variability is widely different across the treatment groups. PERMANOVA may give you the wrong result. We’ll see if that is really a problem and if we can fix it

3 Wd*

source("Wd.R")
with(sample_data(phy), 
     WdS.test(dm = bray.dist, 
              f = Treatment, 
              strata = Location, nrep=999))
## $p.value
## [1] 0.001
## 
## $statistic
## [1] 2.881407
## 
## $nrep
## [1] 999

Analysis with robust Wd* confirms significance of the treatment variable. Now we can use pairwise post-hoc tests to determine which groups are different.

All-vs-all post-hoc tests

with(sample_data(phy),
     Tw2.posthoc.tests(dm = bray.dist,
                       f = Treatment,
                       nrep = 999,
                       strata = Location))
##       Level1 Level2 N1 N2 p.value tw2.stat nrep
##  [1,] "C"    "P"    20 19 0.001   2.333737 999 
##  [2,] "C"    "T"    20 19 0.001   4.241121 999 
##  [3,] "C"    "V"    20 20 0.001   2.452149 999 
##  [4,] "C"    "VP"   20 18 0.001   2.973487 999 
##  [5,] "P"    "T"    19 19 0.001   3.904795 999 
##  [6,] "P"    "V"    19 20 0.001   1.557866 999 
##  [7,] "P"    "VP"   19 18 0.001   3.621718 999 
##  [8,] "T"    "V"    19 20 0.001   2.988084 999 
##  [9,] "T"    "VP"   19 18 0.001   3.043546 999 
## [10,] "V"    "VP"   20 18 0.001   3.426994 999

One-vs-all post-hoc tests

with(sample_data(phy),
     Tw2.posthoc.1vsAll.tests(dm = bray.dist,
                              f = Treatment,
                              nrep = 999,
                              strata = Location))
##    N1 N2 p.value tw2.stat nrep
## C  76 20 0.001   3.04144  999 
## P  77 19 0.001   2.635272 999 
## T  77 19 0.001   3.789716 999 
## V  76 20 0.001   2.143916 999 
## VP 78 18 0.001   3.341587 999

We next perform the same analyses stratified by sampling location.

3.1 Cecal samples

Omnibus test

cecal = subset_samples(phy, Location == 'cecal')
cecal.dist = phyloseq::distance(cecal, method = "bray")

with(sample_data(cecal), 
     WdS.test(dm = cecal.dist,
              f = Treatment,
              nrep=999))
## $p.value
## [1] 0.001
## 
## $statistic
## [1] 3.37582
## 
## $nrep
## [1] 999

All-vs-all post-hoc tests

with(sample_data(cecal),
     Tw2.posthoc.tests(dm = cecal.dist,
                       f = Treatment,
                       nrep = 999,
                       strata = Location))
##       Level1 Level2 N1 N2 p.value tw2.stat nrep
##  [1,] "C"    "P"    10 10 0.001   3.9715   999 
##  [2,] "C"    "T"    10 10 0.001   2.837393 999 
##  [3,] "C"    "V"    10 10 0.001   3.924879 999 
##  [4,] "C"    "VP"   10 10 0.003   3.388221 999 
##  [5,] "P"    "T"    10 10 0.001   3.08591  999 
##  [6,] "P"    "V"    10 10 0.001   3.312034 999 
##  [7,] "P"    "VP"   10 10 0.001   5.470093 999 
##  [8,] "T"    "V"    10 10 0.002   2.369085 999 
##  [9,] "T"    "VP"   10 10 0.015   2.247615 999 
## [10,] "V"    "VP"   10 10 0.001   4.434411 999

One-vs-all post-hoc tests

with(sample_data(cecal),
     Tw2.posthoc.1vsAll.tests(dm = cecal.dist,
                              f = Treatment,
                              nrep = 999,
                              strata = Location))
##    N1 N2 p.value tw2.stat nrep
## C  40 10 0.001   3.390979 999 
## P  40 10 0.001   4.420166 999 
## T  40 10 0.004   2.285291 999 
## V  40 10 0.001   3.232865 999 
## VP 40 10 0.001   3.674069 999

3.2 Fecal samples

Omnibus test

fecal = subset_samples(phy, Location == 'fecal')
fecal.dist = phyloseq::distance(fecal, method = "bray")

with(sample_data(fecal), 
     WdS.test(dm = fecal.dist,
              f = Treatment,
              nrep=999))
## $p.value
## [1] 0.001
## 
## $statistic
## [1] 3.280539
## 
## $nrep
## [1] 999

All-vs-all post-hoc tests

with(sample_data(fecal),
     Tw2.posthoc.tests(dm = fecal.dist,
                       f = Treatment,
                       nrep = 999,
                       strata = Location))
##       Level1 Level2 N1 N2 p.value tw2.stat nrep
##  [1,] "C"    "P"    10 9  0.014   2.52079  999 
##  [2,] "C"    "T"    10 9  0.001   6.123267 999 
##  [3,] "C"    "V"    10 10 0.007   2.629472 999 
##  [4,] "C"    "VP"   10 8  0.004   3.189398 999 
##  [5,] "P"    "T"    9  9  0.002   4.855915 999 
##  [6,] "P"    "V"    9  10 0.084   1.772628 999 
##  [7,] "P"    "VP"   9  8  0.006   3.099817 999 
##  [8,] "T"    "V"    9  10 0.001   6.03463  999 
##  [9,] "T"    "VP"   9  8  0.007   2.998603 999 
## [10,] "V"    "VP"   10 8  0.001   3.998944 999

One-vs-all post-hoc tests

with(sample_data(fecal),
     Tw2.posthoc.1vsAll.tests(dm = fecal.dist,
                              f = Treatment,
                              nrep = 999,
                              strata = Location))
##    N1 N2 p.value tw2.stat nrep
## C  36 10 0.002   3.565188 999 
## P  37 9  0.017   2.343418 999 
## T  37 9  0.001   5.573077 999 
## V  36 10 0.002   3.641632 999 
## VP 38 8  0.011   2.753417 999