#单细胞亚群的生物学合理命名讨论rm(list = ls())library(Seurat)library(SeuratData)library(ggplot2)library(patchwork)library(dplyr)load(file = 'basic.sce.pbmc.Rdata')levels(Idents(pbmc))# 首先提取T细胞子集DimPlot(pbmc, reduction = 'umap', label = TRUE, pt.size = 0.5) + NoLegend()FeaturePlot(pbmc, features = c("CD3D","CD3E")) #T细胞常用CD3来分sce=pbmctable(Idents(sce))#取子集t_sce = sce[, Idents(sce) %in% c('CD8 T')]#然后进行标准的降维聚类分群#代码不要变动#降维聚类分群sce=t_sce#归一化sce <- NormalizeData(sce, normalization.method = "LogNormalize", scale.factor = 1e4) #Find variable featuressce <- FindVariableFeatures(sce, selection.method = 'vst', nfeatures = 2000)#Scale and center the datasce <- ScaleData(sce, vars.to.regress = "percent.mt")#pcasce <- RunPCA(sce, features = VariableFeatures(object = sce)) #计算给定数据集的k.param近邻sce <- FindNeighbors(sce, dims = 1:10)#聚类sce <- FindClusters(sce, resolution = 1 )# Look at cluster IDs of the first 5 cellshead(Idents(sce), 5)table(sce$seurat_clusters) sce <- RunUMAP(sce, dims = 1:10)DimPlot(sce, reduction = 'umap')#为数据集中的每个标识类查找markers(差异表达基因)sce.markers <- FindAllMarkers(object = sce, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)DT::datatable(sce.markers)# write.csv(sce.markers,file=paste0(pro,'_sce.markers.csv'))save(sce.markers, file = 'sce.markers-for-cd8-subsets.Rdata')library(dplyr) #在seurat4里面,是 avg_log2FC,是V3版本是avg_logFCtop10 <- sce.markers %>% group_by(cluster) %>% top_n(5, avg_log2FC)DoHeatmap(sce, top10$gene, size=3) p <- DotPlot(sce, features = unique(top10$gene), assay='RNA' ) + coord_flip()pggsave('check-top5-for-cd8-subsets.pdf' )# cytotoxicity (GZMB, PRF1), 0 # naive (LEF1, SELL, TCF7), 1 # LTB T cells , 2# RPl/s # 参考: https://mp.weixin.qq.com/s/1YjoX8lTIB0e2vV0Eqey2Q marker_genes= c("MKI67","TOP2A",'TNFRSF9','MX1', "SELL","IL7R","CD40LG","ANXA1","FOS")p <- DotPlot(sce, features = marker_genes, assay='RNA' ) + coord_flip() + theme(axis.text.x = element_text(angle = 90))pggsave('check_g1_markers_by_Tcell-SubType.pdf')marker_genes= c("MKI67","TOP2A",'TNFRSF9','MX1')FeaturePlot(pbmc, features = marker_genes)