############### slide 6 ################# setwd("C:/Users/pqiu7/Desktop/SISG/Lecture 08") library(dplyr) library(Seurat) library(patchwork) library(ggplot2) raw_data <- Read10X(data.dir = "filtered_gene_bc_matrices/hg19/") obj <- CreateSeuratObject(counts = raw_data, project="pbmc3k", min.cell=3, min.features=200) raw_data[1:10, 1:5] raw_data[1:10, 1016:1020] dim(raw_data) obj sum(colSums(raw_data!=0)>=200) sum(rowSums(raw_data!=0)>=3) ############### slide 7 ################# obj@meta.data obj@meta.data$nFeature_RNA obj$nFeature_RNA obj@assays$RNA@counts[1:10,1:5] obj@assays$RNA@counts["MS4A1",1:5] obj@assays$RNA@counts[c("MS4A1","CD79B","CD79A"),1:5] ############### slide 8 ################# obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = "^MT-") VlnPlot(obj, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) plot1 <- FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "percent.mt") plot2 <- FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") plot1 + plot2 obj <- subset(obj, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5) obj ############### slide 9 ################# obj <- NormalizeData(obj, normalization.method = "LogNormalize", scale.factor = 10000) obj@assays$RNA@data colSums(exp(obj@assays$RNA@data)-1) ############### slide 10 ################# obj <- FindVariableFeatures(obj, selection.method = "vst", nfeatures = 2000) # Identify the 10 most highly variable genes top10 <- head(VariableFeatures(obj), 10) top10 # plot variable features with and without labels plot1 <- VariableFeaturePlot(obj) plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) plot1 + plot2 ############### slide 11 ################# all.genes <- rownames(obj) length(all.genes) obj <- ScaleData(obj, features = all.genes) obj@assays$RNA@scale.data obj@assays$RNA@scale.data[1:5,1:5] row_standard_deviation = apply(obj@assays$RNA@scale.data, 1, sd) row_mean <- apply(obj@assays$RNA@scale.data, 1, mean) par(mfrow=c(1,2)) plot(row_mean,row_standard_deviation) plot(row_mean, rowMeans(obj@assays$RNA@counts!=0)) ############### slide 12 ################# obj <- RunPCA(obj, features = VariableFeatures(object = obj)) DimPlot(obj, reduction = "pca") # plot(obj@reductions$pca@cell.embeddings[,1], obj@reductions$pca@cell.embeddings[,2]) VizDimLoadings(obj, dims = 1:15, reduction = "pca", ncol = 3) DimHeatmap(obj, dims = 1:15, cells = 500, balanced = TRUE) obj <- JackStraw(obj, num.replicate = 100) obj <- ScoreJackStraw(obj, dims = 1:20) JackStrawPlot(obj, dims = 1:15) ElbowPlot(obj) ############### slide 13 ################# obj <- FindNeighbors(obj, dims = 1:10) obj <- FindClusters(obj, resolution = 0.5) obj@meta.data[1:10,] obj <- RunUMAP(obj, dims = 1:10) DimPlot(obj, reduction = "umap") cluster3.markers <- FindMarkers(obj, ident.1 = 3, min.pct = 0.25) head(cluster3.markers, n = 5) obj.markers <- FindAllMarkers(obj, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) obj.markers %>% group_by(cluster) %>% slice_max(n = 2, order_by = avg_log2FC) ############### slide 14 ################# VlnPlot(obj, features = c("MS4A1", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP", "CD8A"), group.by = "seurat_clusters", ncol = 3) FeaturePlot(obj, features = c("MS4A1", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP", "CD8A")) DotPlot(obj, features = c("PPBP", "FCER1A", "GNLY", "FCGR3A", "CD8A", "MS4A1", "CD3E", "CD14", "LYZ"), group.by = "seurat_clusters") obj.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) -> top10 DoHeatmap(obj, features = top10$gene) + NoLegend() ############### slide 15 ################# new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono", "NK", "DC", "Platelet") names(new.cluster.ids) <- levels(obj) obj <- RenameIdents(obj, new.cluster.ids) Idents(obj) DimPlot(obj, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend() ############### slide 16 ################# obj2 <- obj levels(obj2@active.ident) <- c(levels(obj2@active.ident),"T + NK") obj2@active.ident[obj2@active.ident=='Naive CD4 T']='T + NK' obj2@active.ident[obj2@active.ident=='Memory CD4 T']='T + NK' obj2@active.ident[obj2@active.ident=='CD8 T']='T + NK' obj2@active.ident[obj2@active.ident=='NK']='T + NK' obj2@active.ident <- droplevels(obj2@active.ident) levels(obj2@active.ident) levels(obj2@active.ident) <- c(levels(obj2@active.ident),"Mono + DC") obj2@active.ident[obj2@active.ident=='FCGR3A+ Mono']='Mono + DC' obj2@active.ident[obj2@active.ident=='CD14+ Mono']='Mono + DC' obj2@active.ident[obj2@active.ident=='DC']='Mono + DC' obj2@active.ident <- droplevels(obj2@active.ident) levels(obj2@active.ident) obj2.markers <- FindAllMarkers(obj2, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) obj2.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) -> top10 DoHeatmap(obj2, features = top10$gene) + NoLegend() ############### slide 17 ################# obj3 <- subset(obj, subset = seurat_clusters==1 | seurat_clusters==5 | seurat_clusters==7 ) obj3.markers <- FindAllMarkers(obj3, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) obj3.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) -> top10 DoHeatmap(obj3, features = top10$gene) + NoLegend() ############### slide 18 ################# obj4 <- subset(obj, subset = seurat_clusters==0 | seurat_clusters==2 | seurat_clusters==4 | seurat_clusters==6) levels(obj4@active.ident) <- c(levels(obj4@active.ident),"CD4 T") obj4@active.ident[obj4@active.ident=='Naive CD4 T']='CD4 T' obj4@active.ident[obj4@active.ident=='Memory CD4 T']='CD4 T' levels(obj4@active.ident) <- c(levels(obj4@active.ident),"CD8 + NK") obj4@active.ident[obj4@active.ident=='CD8 T']='CD8 + NK' obj4@active.ident[obj4@active.ident=='NK']='CD8 + NK' obj4@active.ident <- droplevels(obj4@active.ident) levels(obj4@active.ident) obj4.markers <- FindAllMarkers(obj4, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) obj4.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) -> top10 DoHeatmap(obj4, features = top10$gene) + NoLegend() ############### slide 19 ################# obj5 <- subset(obj, subset = seurat_clusters==4 | seurat_clusters==6) obj5.markers <- FindAllMarkers(obj5, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) obj5.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) -> top10 DoHeatmap(obj5, features = top10$gene) + NoLegend() obj6 <- subset(obj, subset = seurat_clusters==0 | seurat_clusters==2) obj6.markers <- FindAllMarkers(obj6, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) obj6.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC) -> top10 DoHeatmap(obj6, features = top10$gene) + NoLegend()