Introduction to Basic XGR Use

  • eXploring Genomic Relations (XGR)
  • Used for:
    • Enrichment Analysis
    • Similarity Analysis
    • Identifying Gene Sub networks

Setup

Install and Load XGR

  • First Install Package using the BiocManager Package (Once Per Computer)
  • This takes a little bit of time, so it would be best to install this before the lesson!
  • Follow the prompts in the command line, and do not update packages when asked (This saves install time)
  • XGR is not located on CRAN, instead it can be downloaded from the XGR github using the bioconductor packages
# if(!("BiocManager" %in% rownames(installed.packages()))) install.packages("BiocManager")
# BiocManager::install("remotes", dependencies=T)
# BiocManager::install("hfang-bristol/XGR", dependencies=T)
## reload the installed package

library(XGR)
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
## Loading required package: dnet
## Loading required package: supraHex
## Loading required package: hexbin
## Loading required package: ggplot2

Load Additional Packages used

  • tidyverse - Data Cleaning and Plotting Tools
  • kableExtra - Nicely formatted tables for rMarkdown
  • RColorBrewer - Color Palettes for Plots
#install.packages('tidyverse')
#install.packages('kableExtra')
#install.packages('RColorBrewer')

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ tibble  3.1.6     ✔ dplyr   1.0.8
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ✔ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## ✖ purrr::compose()       masks igraph::compose()
## ✖ tidyr::crossing()      masks igraph::crossing()
## ✖ dplyr::filter()        masks stats::filter()
## ✖ dplyr::groups()        masks igraph::groups()
## ✖ dplyr::lag()           masks stats::lag()
## ✖ purrr::simplify()      masks igraph::simplify()
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(RColorBrewer)

Enrichment Analysis

Load Data

  • This data is a public dataset from Fairfax et al. 2014, where genes are exposed to interfero gamma, LPS for 24 hours and LPS for 2 hours
  • The data is located at the link below
  • XGR has built in functions to load in data
  • The rest of this code chunk gets DEGS for the three treatment groups
RData.location <- "http://galahad.well.ox.ac.uk/bigdata"


res <- xRDataLoader(RData.customised='JKscience_TS1A', RData.location=RData.location)
## Start at 2022-07-26 21:54:27
## 
## 'JKscience_TS1A' (from http://galahad.well.ox.ac.uk/bigdata/JKscience_TS1A.RData) has been loaded into the working environment (at 2022-07-26 21:54:28)
## 
## End at 2022-07-26 21:54:28
## Runtime in total is: 1 secs
background <- res$Symbol
# Create a data frame for genes significantly induced by IFN24
flag <- res$logFC_INF24_Naive<0 & res$fdr_INF24_Naive<0.01
df_IFN24 <- res[flag, c('Symbol','logFC_INF24_Naive','fdr_INF24_Naive')]
# Create a data frame for genes significantly induced by LPS24
flag <- res$logFC_LPS24_Naive<0 & res$fdr_LPS24_Naive<0.01
df_LPS24 <- res[flag, c('Symbol','logFC_LPS24_Naive','fdr_LPS24_Naive')]
# Create a data frame for genes significantly induced by LPS2
flag <- res$logFC_LPS2_Naive<0 & res$fdr_LPS2_Naive<0.01
df_LPS2 <- res[flag, c('Symbol','logFC_LPS2_Naive','fdr_LPS2_Naive')]
  • This code block changes our data into a usable format
df_IFN24 <- df_IFN24 %>% dplyr::mutate(Treatment = 'IFN24 vs. Control') %>% rename(L2FC = logFC_INF24_Naive, FDR = fdr_INF24_Naive)
df_LPS24 <- df_LPS24 %>% dplyr::mutate(Treatment = 'LPS24 vs. Control') %>% rename(L2FC = logFC_LPS24_Naive, FDR = fdr_LPS24_Naive)
df_LPS2 <- df_LPS2 %>% dplyr::mutate(Treatment = 'LPS2 vs. Control') %>% rename(L2FC = logFC_LPS2_Naive, FDR = fdr_LPS2_Naive)

contrasts <- bind_rows(df_IFN24,df_LPS24,df_LPS2)
head(contrasts %>% 
  arrange(FDR)) %>% 
  kable(booktabs = T, caption = "All Contrasts Dataset") %>%
  kable_styling(full_width = T,bootstrap_options = c("striped",'hover'), font_size = 9) 
All Contrasts Dataset
Symbol L2FC FDR Treatment
STX11 -1.81 0 IFN24 vs. Control
CUL1 -1.69 0 IFN24 vs. Control
ANKRD22 -5.92 0 IFN24 vs. Control
PSME1 -1.43 0 IFN24 vs. Control
TRANK1 -1.55 0 IFN24 vs. Control
GCH1 -4.06 0 IFN24 vs. Control
mycontrasts <- unique(contrasts$Treatment)

mycontrasts
## [1] "IFN24 vs. Control" "LPS24 vs. Control" "LPS2 vs. Control"
background <-unique(toupper(contrasts$Symbol))

tail(background)
## [1] "AK057196" "NEUROG2"  "BX106374" "VN1R5"    "CPLX2"    "AW296529"

Generate XGR Objects

Generate_XGR_list <- function(data, contrast, mygo , tree = F, l2fc ,background){
 
  mysymbol <-  data %>% dplyr::filter(FDR < 0.1, abs(L2FC) > l2fc,  Treatment == contrast) %>%
    dplyr::select(Symbol, FDR) %>% mutate(symbol = toupper(Symbol)) %>%
    pull(symbol) 
  
  myxgr <- xEnricherGenes(data = mysymbol, ontology = mygo, background = background,ontology.algorithm = ifelse(tree == F,"none","lea"))
  
  if(tree == F){myxgr <- try(xEnrichConciser(myxgr))}
}
xgr_list <- list(
Generate_XGR_list(contrast = mycontrasts[1], mygo = "MsigdbH", data = contrasts, l2fc = 0,background = background),
Generate_XGR_list(contrast = mycontrasts[2], mygo = "MsigdbH", data = contrasts, l2fc = 0,background = background),
Generate_XGR_list(contrast = mycontrasts[3], mygo = "MsigdbH", data = contrasts, l2fc = 0,background = background)



)
## Start at 2022-07-26 21:54:28
## 
## Load the ontology MsigdbH and its gene annotations (2022-07-26 21:54:28) ...
## Start at 2022-07-26 21:54:28
## 
## 'org.Hs.egMsigdbH' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.egMsigdbH.RData) has been loaded into the working environment (at 2022-07-26 21:54:29)
## 
## End at 2022-07-26 21:54:29
## Runtime in total is: 1 secs
## Do gene mapping from Symbols to EntrezIDs (2022-07-26 21:54:29) ...
## Start at 2022-07-26 21:54:29
## 
## 'org.Hs.eg' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.eg.RData) has been loaded into the working environment (at 2022-07-26 21:54:30)
## 
## End at 2022-07-26 21:54:30
## Runtime in total is: 1 secs
## human organism (2022-07-26 21:54:30)
## Among 4754 symbols of input data, there are 3886 mappable via official gene symbols but 868 left unmappable
## Start at 2022-07-26 21:54:30
## 
## 'org.Hs.eg' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.eg.RData) has been loaded into the working environment (at 2022-07-26 21:54:32)
## 
## End at 2022-07-26 21:54:32
## Runtime in total is: 2 secs
## human organism (2022-07-26 21:54:32)
## Among 8688 symbols of input data, there are 6933 mappable via official gene symbols but 1755 left unmappable
## 
## #######################################################
## 'xEnricher' is being called (2022-07-26 21:54:32):
## #######################################################
## First, generate a subgraph induced (via 'all_paths' mode) by the annotation data (2022-07-26 21:54:32) ...
## Next, prepare enrichment analysis (2022-07-26 21:54:32) ...
##  There are 3886 genes/SNPs of interest tested against 6933 genes/SNPs as the background (annotatable only? FALSE) (2022-07-26 21:54:32)
## Third, perform enrichment analysis using 'fisher' test (2022-07-26 21:54:32) ...
##  There are 49 terms being used, each restricted within [10,2000] annotations
## Last, adjust the p-values for 48 terms (with 5 minimum overlaps) using the BH method (2022-07-26 21:54:32) ...
## Start at 2022-07-26 21:54:32
## 
## 'org.Hs.eg' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.eg.RData) has been loaded into the working environment (at 2022-07-26 21:54:34)
## 
## End at 2022-07-26 21:54:34
## Runtime in total is: 2 secs
## #######################################################
## 'xEnricher' has been finished (2022-07-26 21:54:34)!
## #######################################################
## 
## End at 2022-07-26 21:54:34
## Runtime in total (xEnricherGenes): 6 secs
##  Among 48 terms, there are 48 non-redundant terms
## Start at 2022-07-26 21:54:35
## 
## Load the ontology MsigdbH and its gene annotations (2022-07-26 21:54:35) ...
## Start at 2022-07-26 21:54:35
## 
## 'org.Hs.egMsigdbH' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.egMsigdbH.RData) has been loaded into the working environment (at 2022-07-26 21:54:35)
## 
## End at 2022-07-26 21:54:35
## Runtime in total is: 0 secs
## Do gene mapping from Symbols to EntrezIDs (2022-07-26 21:54:35) ...
## Start at 2022-07-26 21:54:35
## 
## 'org.Hs.eg' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.eg.RData) has been loaded into the working environment (at 2022-07-26 21:54:36)
## 
## End at 2022-07-26 21:54:36
## Runtime in total is: 1 secs
## human organism (2022-07-26 21:54:36)
## Among 5019 symbols of input data, there are 4117 mappable via official gene symbols but 902 left unmappable
## Start at 2022-07-26 21:54:36
## 
## 'org.Hs.eg' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.eg.RData) has been loaded into the working environment (at 2022-07-26 21:54:38)
## 
## End at 2022-07-26 21:54:38
## Runtime in total is: 2 secs
## human organism (2022-07-26 21:54:38)
## Among 8688 symbols of input data, there are 6933 mappable via official gene symbols but 1755 left unmappable
## 
## #######################################################
## 'xEnricher' is being called (2022-07-26 21:54:38):
## #######################################################
## First, generate a subgraph induced (via 'all_paths' mode) by the annotation data (2022-07-26 21:54:38) ...
## Next, prepare enrichment analysis (2022-07-26 21:54:38) ...
##  There are 4117 genes/SNPs of interest tested against 6933 genes/SNPs as the background (annotatable only? FALSE) (2022-07-26 21:54:38)
## Third, perform enrichment analysis using 'fisher' test (2022-07-26 21:54:38) ...
##  There are 49 terms being used, each restricted within [10,2000] annotations
## Last, adjust the p-values for 48 terms (with 5 minimum overlaps) using the BH method (2022-07-26 21:54:38) ...
## Start at 2022-07-26 21:54:38
## 
## 'org.Hs.eg' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.eg.RData) has been loaded into the working environment (at 2022-07-26 21:54:40)
## 
## End at 2022-07-26 21:54:40
## Runtime in total is: 2 secs
## #######################################################
## 'xEnricher' has been finished (2022-07-26 21:54:41)!
## #######################################################
## 
## End at 2022-07-26 21:54:41
## Runtime in total (xEnricherGenes): 6 secs
##  Among 48 terms, there are 48 non-redundant terms
## Start at 2022-07-26 21:54:41
## 
## Load the ontology MsigdbH and its gene annotations (2022-07-26 21:54:41) ...
## Start at 2022-07-26 21:54:41
## 
## 'org.Hs.egMsigdbH' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.egMsigdbH.RData) has been loaded into the working environment (at 2022-07-26 21:54:41)
## 
## End at 2022-07-26 21:54:41
## Runtime in total is: 0 secs
## Do gene mapping from Symbols to EntrezIDs (2022-07-26 21:54:41) ...
## Start at 2022-07-26 21:54:41
## 
## 'org.Hs.eg' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.eg.RData) has been loaded into the working environment (at 2022-07-26 21:54:42)
## 
## End at 2022-07-26 21:54:42
## Runtime in total is: 1 secs
## human organism (2022-07-26 21:54:42)
## Among 4741 symbols of input data, there are 3519 mappable via official gene symbols but 1222 left unmappable
## Start at 2022-07-26 21:54:42
## 
## 'org.Hs.eg' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.eg.RData) has been loaded into the working environment (at 2022-07-26 21:54:44)
## 
## End at 2022-07-26 21:54:44
## Runtime in total is: 2 secs
## human organism (2022-07-26 21:54:44)
## Among 8688 symbols of input data, there are 6933 mappable via official gene symbols but 1755 left unmappable
## 
## #######################################################
## 'xEnricher' is being called (2022-07-26 21:54:44):
## #######################################################
## First, generate a subgraph induced (via 'all_paths' mode) by the annotation data (2022-07-26 21:54:44) ...
## Next, prepare enrichment analysis (2022-07-26 21:54:44) ...
##  There are 3519 genes/SNPs of interest tested against 6933 genes/SNPs as the background (annotatable only? FALSE) (2022-07-26 21:54:44)
## Third, perform enrichment analysis using 'fisher' test (2022-07-26 21:54:44) ...
##  There are 49 terms being used, each restricted within [10,2000] annotations
## Last, adjust the p-values for 49 terms (with 5 minimum overlaps) using the BH method (2022-07-26 21:54:44) ...
## Start at 2022-07-26 21:54:44
## 
## 'org.Hs.eg' (from http://galahad.well.ox.ac.uk/bigdata/org.Hs.eg.RData) has been loaded into the working environment (at 2022-07-26 21:54:46)
## 
## End at 2022-07-26 21:54:46
## Runtime in total is: 2 secs
## #######################################################
## 'xEnricher' has been finished (2022-07-26 21:54:47)!
## #######################################################
## 
## End at 2022-07-26 21:54:47
## Runtime in total (xEnricherGenes): 6 secs
##  Among 49 terms, there are 49 non-redundant terms
names(xgr_list) <- mycontrasts

Pathway Comparison Plot

 p <- xEnrichCompare(xgr_list, displayBy="fdr", FDR.cutoff = 0.1, wrap.width = 45) + 
  scale_fill_brewer(palette='Set2') +
  ggtitle('Hallmark Pathway Enrichments (FDR < 0.1)')

p

Single Comparison Plot

one_contrast_plot <- xEnrichBarplot(xgr_list[[1]], top_num=10, displayBy="fc")
one_contrast_plot
## Warning: Position guide is perpendicular to the intended axis. Did you mean to
## specify a different guide `position`?

Create XGR Dataset

head(xEnrichViewer(xgr_list[[1]], top_num = 250, sortBy = "adjp", details = T) %>% 
  mutate(Contrast = names(xgr_list)[1]),1) %>% 
  kable(booktabs = T, caption = "xEnrichViewer Output") %>%
  kable_styling(full_width = T,bootstrap_options = c("striped",'hover'), font_size = 9)
xEnrichViewer Output
name nAnno nOverlap fc zscore pvalue adjp or CIl CIu distance namespace members_Overlap members_Anno Contrast
HALLMARK_INTERFERON_GAMMA_RESPONSE Genes up-regulated in response to IFNG [GeneID=3458]. 177 164 1.65 9.94 0 0 10.3 5.83 19.8 H ADAR, APOL6, ARID5B, AUTS2, BANK1, BATF2, BPGM, BST2, C1R, C1S, CASP1, CASP3, CASP4, CASP7, CCL2, CCL7, CD274, CD38, CD40, CD69, CD74, CD86, CDKN1A, CFB, CFH, CIITA, CMKLR1, CMPK2, CSF2RB, CXCL10, CXCL9, DDX58, DDX60, DHX58, EIF2AK2, EIF4E3, EPSTI1, FAS, FGL2, FPR1, GBP4, GBP6, GCH1, GPR18, GZMA, HERC6, HLA-B, HLA-DRB1, ICAM1, IDO1, IFI27, IFI30, IFI35, IFI44, IFI44L, IFIH1, IFIT1, IFIT3, IFITM2, IFNAR2, IL10RA, IL15, IL15RA, IL18BP, IL2RB, IL4R, IL7, IRF1, IRF2, IRF4, IRF7, IRF8, IRF9, ISG15, ISG20, ISOC1, ITGB7, JAK2, LAP3, LATS2, LGALS3BP, LY6E, LYSMD2, MARCH1, METTL7B, MT2A, MTHFD2, MVP, MX1, MX2, MYD88, NAMPT, NFKB1, NLRC5, NMI, NOD1, NUP93, OAS2, OAS3, OASL, OGFR, P2RY14, PARP12, PARP14, PDE4B, PIM1, PLA2G4A, PLSCR1, PML, PSMA2, PSMA3, PSMB10, PSMB2, PSMB8, PSMB9, PSME1, PTGS2, PTPN1, PTPN2, PTPN6, RBCK1, RIPK1, RIPK2, RNF213, RNF31, RSAD2, SAMD9L, SAMHD1, SECTM1, SERPING1, SLAMF7, SLC25A28, SOCS1, SOCS3, SOD2, SP110, SPPL2A, SRI, SSPN, ST3GAL5, STAT1, STAT2, STAT3, STAT4, TAP1, TAPBP, TDRD7, TNFAIP3, TNFAIP6, TNFSF10, TOR1B, TRAFD1, TRIM14, TRIM21, TRIM26, UBE2L6, UPP1, VAMP5, VAMP8, VCAM1, WARS, XAF1, ZBP1, ZNFX1 ADAR, APOL6, ARID5B, ARL4A, AUTS2, BANK1, BATF2, BPGM, BST2, BTG1, C1R, C1S, CASP1, CASP3, CASP4, CASP7, CASP8, CCL2, CCL5, CCL7, CD274, CD38, CD40, CD69, CD74, CD86, CDKN1A, CFB, CFH, CIITA, CMKLR1, CMPK2, CSF2RB, CXCL10, CXCL9, DDX58, DDX60, DHX58, EIF2AK2, EIF4E3, EPSTI1, FAS, FGL2, FPR1, GBP4, GBP6, GCH1, GPR18, GZMA, HERC6, HIF1A, HLA-B, HLA-DRB1, ICAM1, IDO1, IFI27, IFI30, IFI35, IFI44, IFI44L, IFIH1, IFIT1, IFIT3, IFITM2, IFNAR2, IL10RA, IL15, IL15RA, IL18BP, IL2RB, IL4R, IL6, IL7, IRF1, IRF2, IRF4, IRF7, IRF8, IRF9, ISG15, ISG20, ISOC1, ITGB7, JAK2, LAP3, LATS2, LGALS3BP, LY6E, LYSMD2, MARCH1, METTL7B, MT2A, MTHFD2, MVP, MX1, MX2, MYD88, NAMPT, NCOA3, NFKB1, NFKBIA, NLRC5, NMI, NOD1, NUP93, OAS2, OAS3, OASL, OGFR, P2RY14, PARP12, PARP14, PDE4B, PELI1, PIM1, PLA2G4A, PLSCR1, PML, PNP, PSMA2, PSMA3, PSMB10, PSMB2, PSMB8, PSMB9, PSME1, PTGS2, PTPN1, PTPN2, PTPN6, RAPGEF6, RBCK1, RIPK1, RIPK2, RNF213, RNF31, RSAD2, SAMD9L, SAMHD1, SECTM1, SERPING1, SLAMF7, SLC25A28, SOCS1, SOCS3, SOD2, SP110, SPPL2A, SRI, SSPN, ST3GAL5, ST8SIA4, STAT1, STAT2, STAT3, STAT4, TAP1, TAPBP, TDRD7, TNFAIP3, TNFAIP6, TNFSF10, TOR1B, TRAFD1, TRIM14, TRIM21, TRIM25, TRIM26, UBE2L6, UPP1, VAMP5, VAMP8, VCAM1, WARS, XAF1, ZBP1, ZNFX1 IFN24 vs. Control