使用limma批量计算多组之间的差异（单细胞数据） - 《生物信息分析》

需要的数据：
依赖的R包
代码

需要的数据：

基因表达矩阵：行是基因，列是细胞
细胞分组信息：一共一列，行名为细胞，第一列名字为Cell.type,代表分组信息

依赖的R包

GSEAbase
limma
dplyr
tidyr
GSVA
pystr (github)
purrr
tibble
pheatmap

代码

library(GSEABase)
library(Biobase)
library(genefilter)
library(limma)
library(RColorBrewer)
library(GSVA)
library(dplyr)
library(pystr)
##这里可以任意切换geneset，只要你有
geneSets <- getGmt("/dellfsqd2/ST_LBI/USER/panxiaoguang/SingleCell/liangxue_data/cluster_1/GSVA/DataBase/c3.tft.v7.0.symbols.gmt")
re_grp<-function(x,s){
    purrr::map_chr(x,function(haha){if (haha==s){"A"}else{"B"}})
    }
###该函数x为分组编号，或者是细胞类型，或者是时间序列，cellinfo为细胞分组(行名为细胞，Cell.type为分组)
###num为想要提取的top n基因或者pathway，依赖dplyr,limma
cal_Diff<-function(x,cellInfo,res_es,num){
    need<-cellInfo%>%transmute(new_group=re_grp(Cell.type,x))%>%.$new_group
    grouP<-as.factor(need)
    desigN<-model.matrix(~grouP+0)
    rownames(desigN)<-rownames(cellInfo)
    comparE<-makeContrasts(grouPA-grouPB,levels=desigN)
    fiT <- lmFit(res_es, desigN)
    fiT2 <- contrasts.fit(fiT, comparE)
    fiT3 <- eBayes(fiT2)
    Diff<-topTable(fiT3,p.value=0.05,coef=1,num=num)
    if (nrow(Diff)>0){
        Diff$cluster<-x
        Diff<-Diff%>%tibble::rownames_to_column(var="Name")%>%tbl_df()%>%select(Name,logFC,t,P.Value,adj.P.Val,cluster)
    }else{
        Diff<-tibble::tibble(Name=NA,logFC=NA,t=NA,P.Value=NA,adj.P.Val=NA,cluster=NA)
    }
}
###该函数mat输入表达矩阵，info输入分组信息（行名为细胞，Cell.type为分组信息),需要预先载入dplyr
Average_express<-function(mat,info){
    new_res<-mat%>%tibble::rownames_to_column(var="Name")%>%tidyr::gather(cell,value,-Name)
    new_cellinfo<-info 
    new_cellinfo<-tibble::rownames_to_column(new_cellinfo,var="cell")
    all<-left_join(new_res,new_cellinfo,by="cell")
    fuck<-all%>%group_by(Name,Cell.type)%>%summarise(Mean=mean(value))%>%tidyr::spread(Cell.type,Mean)
    fuck
}
for(i in c("PF","PO3","PO7","PO11")){
    expMat<- read.table(pystr_format("cluster_1.exprMat_{1}.txt",i),sep="\t",header=TRUE,row.names=1,check.names=FALSE,stringsAsFactors=FALSE)
    cellInfo<-read.table(pystr_format("cluster_1.cellInfo_{1}.txt",i),sep="\t",header=TRUE,row.names=1,check.names=FALSE,stringsAsFactors=FALSE)
    mydata<-as.matrix(expMat)
    res_es <- gsva(mydata, geneSets, min.sz=10, max.sz=500, verbose=FALSE, parallel.sz=8)
    res_es<-as.data.frame(res_es)
    write.table(res_es,pystr_format("./cell_diff/cluster_1_{1}_c3.txt",i),sep="\t",quote=FALSE)
    ##差异分析
    ##该函数用来分组，x为一个列表，s为输入的分组信息
    ##计算差异基因集的合集
    #fin<-unique(unlist(lapply(list(0,1,2,3),cal_Diff,cellInfo=cellInfo,res_es=res_es,num=10)))
    ##这里要明确细胞分类的个数，多了会报错
    fin<-do.call('rbind',lapply(list(0,1,2,3),cal_Diff,cellInfo=cellInfo,res_es=res_es,num=10))
    fin<-fin%>%filter(!is.na(cluster))
    write.table(fin,pystr_format("./cell_diff/cluster_1_{1}_c3_diff_geneset.txt",i),sep="\t",quote=FALSE,row.names=FALSE)
    ###计算平均表达量矩阵
    heihei2<-Average_express(res_es,cellInfo)
    heihei_need<-filter(heihei2,Name%in%unique(fin$Name))
    houhou2<-tibble::column_to_rownames(heihei_need,var="Name") 
    ###画热图
    pheatmap::pheatmap(houhou2,file=pystr_format("./cell_diff/cluster_1_{1}_c3_diff_heat.pdf",i))
}