Q4：细胞亚群等比例抽样 - 《scRNA》

细胞亚群等比例抽样在做gsva, 细胞通讯，转录因子，拟时序, inferCNV(肿瘤拷贝数)的时候会有用。

#细胞亚群等比例抽样
rm(list = ls())
library(Seurat)
# devtools::install_github('satijalab/seurat-data')
library(SeuratData)
library(ggplot2)
library(patchwork)
library(dplyr)
load(file = 'basic.sce.pbmc.Rdata')
DimPlot(pbmc, 
        reduction = 'umap', 
        label = TRUE, pt.size = 0.5) + NoLegend()
sce=pbmc
features= c('IL7R', 'CCR7','CD14', 'LYZ',  'IL7R', 'S100A4',"MS4A1", "CD8A",'FOXP3',
            'FCGR3A', 'MS4A7', 'GNLY', 'NKG7',
            'FCER1A', 'CST3','PPBP')
DoHeatmap(subset(sce ), 
          features = features, 
          size = 3
          ) 
table(Idents(sce))

每个细胞数量不一致的话，有些小的细胞亚群都已经看不到了，所以可能就需要等比例抽取

#抽取15个细胞
DoHeatmap(subset(sce, downsample = 15), 
          features = features, 
          size = 3)
#做gsva, 细胞通讯，转录因子，拟时序, inferCNV(肿瘤拷贝数)的时候会有用
# 真实项目，10万+

图抽取15个细胞

#每个细胞亚群抽10 
allCells=names(Idents(sce)) #所有细胞
allType = levels(Idents(sce)) #所以细胞亚群

#写循环在所有亚群中找基因
choose_Cells = unlist(lapply(allType, function(x){
  cgCells = allCells[Idents(sce)== x ]
  cg=sample(cgCells,10)
  cg
  }))


#9种细胞亚型，各抽出10个，共90个
cg_sce = sce[, allCells %in% choose_Cells]
cg_sce
as.data.frame(table(Idents(cg_sce)))

#看原始表达量 
DoHeatmap(subset(sce), 
          features = features, 
          size = 3,
          slot = 'data')

图原始表达量