官网3.1版本已经无法找到该指南的链接,其实还是有的,网址:
https://satijalab.org/seurat/v3.1/interaction_vignette.html

载入数据

下面演示了一些与Seurat对象进行交互的有用功能。出于演示目的,我们将使用在第一个指导教程中创建的2700 PBMC对象。您可以在此处下载预先计算的对象。为了模拟有两个重复的情况,将一半命名为“rep1”,另一半命名为”rep2”

  1. library(Seurat)
  2. pbmc <- readRDS(file = "../data/pbmc3k_final.rds")
  3. # 随机设置两个重复
  4. set.seed(42)
  5. pbmc$replicate <- sample(c("rep1", "rep2"), size = ncol(pbmc), replace = TRUE)

从细胞聚类和样本重复中切换Idents

  1. # 默认画的是object@ident)
  2. DimPlot(pbmc, reduction = "umap")

Seurat3.1的灵活操作指南 - 图1

  1. # 把细胞分类因子先储存到对象中
  2. pbmc$CellType <- Idents(pbmc)
  3. # 切换Idents
  4. Idents(pbmc) <- "replicate"
  5. DimPlot(pbmc, reduction = "umap")

Seurat3.1的灵活操作指南 - 图2

  1. # alternately : DimPlot(pbmc, reduction = 'umap', group.by = 'replicate') you can pass the
  2. # shape.by to label points by both replicate and cell type
  3. # Switch back to cell type labels
  4. Idents(pbmc) <- "CellType"

分别或同时统计不同聚类或者不同样本来源的细胞数目

  1. # 每个聚类包含多少细胞?
  2. table(Idents(pbmc))
  1. ##
  2. ## Naive CD4 T Memory CD4 T CD14+ Mono B CD8 T
  3. ## 697 483 480 344 271
  4. ## FCGR3A+ Mono NK DC Platelet
  5. ## 162 155 32 14
  1. # 每组重复包含多少细胞?
  2. table(pbmc$replicate)
  1. ##
  2. ## rep1 rep2
  3. ## 1348 1290
  1. # 每个聚类细胞数占比
  2. prop.table(table(Idents(pbmc)))
  1. ##
  2. ## Naive CD4 T Memory CD4 T CD14+ Mono B CD8 T
  3. ## 0.264215315 0.183093252 0.181956027 0.130401820 0.102729340
  4. ## FCGR3A+ Mono NK DC Platelet
  5. ## 0.061410159 0.058756634 0.012130402 0.005307051
  1. # 样本分组和细胞聚类一起统计
  2. table(Idents(pbmc), pbmc$replicate)
  1. ##
  2. ## rep1 rep2
  3. ## Naive CD4 T 354 343
  4. ## Memory CD4 T 249 234
  5. ## CD14+ Mono 232 248
  6. ## B 173 171
  7. ## CD8 T 154 117
  8. ## FCGR3A+ Mono 81 81
  9. ## NK 81 74
  10. ## DC 18 14
  11. ## Platelet 6 8
  1. prop.table(table(Idents(pbmc), pbmc$replicate), margin = 2)
  1. ##
  2. ## rep1 rep2
  3. ## Naive CD4 T 0.262611276 0.265891473
  4. ## Memory CD4 T 0.184718101 0.181395349
  5. ## CD14+ Mono 0.172106825 0.192248062
  6. ## B 0.128338279 0.132558140
  7. ## CD8 T 0.114243323 0.090697674
  8. ## FCGR3A+ Mono 0.060089021 0.062790698
  9. ## NK 0.060089021 0.057364341
  10. ## DC 0.013353116 0.010852713
  11. ## Platelet 0.004451039 0.006201550

提取特定的Seurat子集做亚型分析

  1. # What are the cell names of all NK cells?
  2. WhichCells(pbmc, idents = "NK")
  1. ## [1] "AAACCGTGTATGCG" "AAATTCGATTCTCA" "AACCTTACGCGAGA" "AACGCCCTCGTACA"
  2. ## [5] "AACGTCGAGTATCG" "AAGATTACCTCAAG" "AAGCAAGAGCTTAG" "AAGCAAGAGGTGTT"
  3. ## [9] "AAGTAGGATACAGC" "AATACTGAATTGGC" "AATCCTTGGTGAGG" "AATCTCTGCTTTAC"
  4. ## [13] "ACAAATTGTTGCGA" "ACAACCGAGGGATG" "ACAATTGATGACTG" "ACACCCTGGTGTTG"
  5. ## [17] "ACAGGTACTGGTGT" "ACCTGGCTAAGTAG" "ACGAACACCTTGTT" "ACGATCGAGGACTT"
  6. ## [21] "ACGCAATGGTTCAG" "ACGCTGCTGTTCTT" "ACGGAACTCAGATC" "ACGTGATGTGACAC"
  7. ## [25] "ACGTTGGAGCCAAT" "ACTGCCACTCCGTC" "ACTGGCCTTCAGTG" "ACTTCAACGTAGGG"
  8. ## [29] "AGAACAGAAATGCC" "AGATATACCCGTAA" "AGATTCCTGTTCAG" "AGCCTCTGCCAATG"
  9. ## [33] "AGCGATTGAGATCC" "AGGATGCTTTAGGC" "AGGGACGAGTCAAC" "AGTAATACATCACG"
  10. ## [37] "AGTCACGATGAGCT" "AGTTTGCTACTGGT" "ATACCACTGCCAAT" "ATACTCTGGTATGC"
  11. ## [41] "ATCCCGTGCAGTCA" "ATCTTTCTTGTCCC" "ATGAAGGACTTGCC" "ATGATAACTTCACT"
  12. ## [45] "ATGATATGGTGCTA" "ATGGACACGCATCA" "ATGGGTACATCGGT" "ATTAACGATGAGAA"
  13. ## [49] "ATTCCAACTTAGGC" "CAAGGTTGTCTGGA" "CAATCTACTGACTG" "CACCACTGGCGAAG"
  14. ## [53] "CACGGGTGGAGGAC" "CAGATGACATTCTC" "CAGCAATGGAGGGT" "CAGCGGACCTTTAC"
  15. ## [57] "CAGCTCTGTGTGGT" "CAGTTTACACACGT" "CATCAGGACTTCCG" "CATCAGGATAGCCA"
  16. ## [61] "CATGAGACGTTGAC" "CATTACACCAACTG" "CATTTCGAGATACC" "CCTCGAACACTTTC"
  17. ## [65] "CGACCACTAAAGTG" "CGACCACTGCCAAT" "CGAGGCTGACGCTA" "CGCCGAGAGCTTAG"
  18. ## [69] "CGGCGAACGACAAA" "CGGCGAACTACTTC" "CGGGCATGTCTCTA" "CGTACCTGGCATCA"
  19. ## [73] "CGTGTAGACGATAC" "CGTGTAGAGTTACG" "CGTGTAGATTCGGA" "CTAAACCTCTGACA"
  20. ## [77] "CTAACGGAACCGAT" "CTACGCACTGGTCA" "CTACTCCTATGTCG" "CTAGTTACGAAACA"
  21. ## [81] "CTATACTGCTACGA" "CTATACTGTCTCAT" "CTCGACTGGTTGAC" "CTGAGAACGTAAAG"
  22. ## [85] "CTTTAGTGACGGGA" "GAACCAACTTCCGC" "GAAGTGCTAAACGA" "GAATGCACCTTCGC"
  23. ## [89] "GAATTAACGTCGTA" "GACGGCACACGGGA" "GAGCGCTGAAGATG" "GAGGTACTGACACT"
  24. ## [93] "GAGGTGGATCCTCG" "GATAGAGAAGGGTG" "GATCCCTGACCTTT" "GCACACCTGTGCTA"
  25. ## [97] "GCACCACTTCCTTA" "GCACTAGAGTCGTA" "GCAGGGCTATCGAC" "GCCGGAACGTTCTT"
  26. ## [101] "GCCTACACAGTTCG" "GCGCATCTTGCTCC" "GCGCGATGGTGCAT" "GGAAGGTGGCGAGA"
  27. ## [105] "GGACGCTGTCCTCG" "GGAGGCCTCGTTGA" "GGCAAGGAAAAAGC" "GGCATATGCTTATC"
  28. ## [109] "GGCCGAACTCTAGG" "GGCTAAACACCTGA" "GGGTTAACGTGCAT" "GGTGGAGAAACGGG"
  29. ## [113] "GTAGTGTGAGCGGA" "GTCGACCTGAATGA" "GTGATTCTGGTTCA" "GTGTATCTAGTAGA"
  30. ## [117] "GTTAAAACCGAGAG" "GTTCAACTGGGACA" "GTTGACGATATCGG" "TAACTCACTCTACT"
  31. ## [121] "TAAGAGGACTTGTT" "TAATGCCTCGTCTC" "TACGGCCTGGGACA" "TACTACTGATGTCG"
  32. ## [125] "TACTCTGAATCGAC" "TACTGTTGAGGCGA" "TAGCATCTCAGCTA" "TAGCCCACAGCTAC"
  33. ## [129] "TAGGGACTGAACTC" "TAGTGGTGAAGTGA" "TAGTTAGAACCACA" "TATGAATGGAGGAC"
  34. ## [133] "TATGGGTGCATCAG" "TATTTCCTGGAGGT" "TCAACACTGTTTGG" "TCAGACGACGTTAG"
  35. ## [137] "TCCCGAACACAGTC" "TCCTAAACCGCATA" "TCGATTTGCAGCTA" "TCTAACACCAGTTG"
  36. ## [141] "TGATAAACTCCGTC" "TGCACAGACGACAT" "TGCCACTGCGATAC" "TGCTGAGAGAGCAG"
  37. ## [145] "TGGAACACAAACAG" "TGGTAGACCCTCAC" "TGTAATGACACAAC" "TGTAATGAGGTAAA"
  38. ## [149] "TTACTCGATCTACT" "TTAGTCTGCCAACA" "TTCCAAACTCCCAC" "TTCCCACTTGAGGG"
  39. ## [153] "TTCTAGTGGAGAGC" "TTCTGATGGAGACG" "TTGTCATGGACGGA"
  1. # 提取NK细胞的表达矩阵
  2. nk.raw.data <- as.matrix(GetAssayData(pbmc, slot = "counts")[, WhichCells(pbmc, ident = "NK")])
  3. # 获取基因表达量大于1的对象
  4. subset(pbmc, subset = MS4A1 > 1)
  1. ## An object of class Seurat
  2. ## 13714 features across 414 samples within 1 assay
  3. ## Active assay: RNA (13714 features)
  4. ## 2 dimensional reductions calculated: pca, umap
  1. subset(pbmc, subset = replicate == "rep2")
  1. ## An object of class Seurat
  2. ## 13714 features across 1290 samples within 1 assay
  3. ## Active assay: RNA (13714 features)
  4. ## 2 dimensional reductions calculated: pca, umap
  1. # 选择两个细胞类型
  2. subset(pbmc, idents = c("NK", "B"))
  1. ## An object of class Seurat
  2. ## 13714 features across 499 samples within 1 assay
  3. ## Active assay: RNA (13714 features)
  4. ## 2 dimensional reductions calculated: pca, umap
  1. # 排除掉某些细胞类型
  2. subset(pbmc, idents = c("NK", "B"), invert = TRUE)
  1. ## An object of class Seurat
  2. ## 13714 features across 2139 samples within 1 assay
  3. ## Active assay: RNA (13714 features)
  4. ## 2 dimensional reductions calculated: pca, umap
  1. # note that if you wish to perform additional rounds of clustering after subsetting we recommend
  2. # re-running FindVariableFeatures() and ScaleData()

计算基因的平均表达量

  1. # 计算平均表达量
  2. cluster.averages <- AverageExpression(pbmc)
  3. head(cluster.averages[["RNA"]][, 1:5])
Native CD4 T Memory CD4 T CD14+Mono B CD8 T
AL627309.1 0.0061287 0.0059273 0.0485434 0.0000000 0.0205459
AP006222.2 0.0000000 0.0082061 0.0108847 0.0000000 0.0119149
RP11-206L10.2 0.0074531 0.0000000 0.0000000 0.0206503 0.0000000
RP11-206L10.9 0.0000000 0.0000000 0.0105012 0.0000000 0.0000000
LINC00115 0.0191189 0.0246905 0.0375374 0.0388854 0.0194828
NOC2L 0.4974632 0.3598115 0.2725375 0.5865349 0.5570490
  1. # 返回Seurat对象用于下游分析
  2. orig.levels <- levels(pbmc)
  3. Idents(pbmc) <- gsub(pattern = " ", replacement = "_", x = Idents(pbmc))
  4. orig.levels <- gsub(pattern = " ", replacement = "_", x = orig.levels)
  5. levels(pbmc) <- orig.levels
  6. cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE)
  7. cluster.averages
  1. ## An object of class Seurat
  2. ## 13714 features across 9 samples within 1 assay
  3. ## Active assay: RNA (13714 features)
  1. # How can I plot the average expression of NK cells vs. CD8 T cells? Pass do.hover = T for an
  2. # interactive plot to identify gene outliers
  3. CellScatter(cluster.averages, cell1 = "NK", cell2 = "CD8_T")

Seurat3.1的灵活操作指南 - 图3

  1. # How can I calculate expression averages separately for each replicate?
  2. cluster.averages <- AverageExpression(pbmc, return.seurat = TRUE, add.ident = "replicate")
  3. CellScatter(cluster.averages, cell1 = "CD8_T_rep1", cell2 = "CD8_T_rep2")

Seurat3.1的灵活操作指南 - 图4

  1. # You can also plot heatmaps of these 'in silico' bulk datasets to visualize agreement between
  2. # replicates
  3. DoHeatmap(cluster.averages, features = unlist(TopFeatures(pbmc[["pca"]], balanced = TRUE)), size = 3,
  4. draw.lines = FALSE)

Seurat3.1的灵活操作指南 - 图5