需要的数据:
- 基因表达矩阵:行是基因,列是细胞
- 细胞分组信息:一共一列,行名为细胞,第一列名字为Cell.type,代表分组信息
依赖的R包
- GSEAbase
- limma
- dplyr
- tidyr
- GSVA
- pystr (github)
- purrr
- tibble
- pheatmap
代码
- library(GSEABase)
- library(Biobase)
- library(genefilter)
- library(limma)
- library(RColorBrewer)
- library(GSVA)
- library(dplyr)
- library(pystr)
- ##这里可以任意切换geneset,只要你有
- geneSets <- getGmt("/dellfsqd2/ST_LBI/USER/panxiaoguang/SingleCell/liangxue_data/cluster_1/GSVA/DataBase/c3.tft.v7.0.symbols.gmt")
- re_grp<-function(x,s){
-     purrr::map_chr(x,function(haha){if (haha==s){"A"}else{"B"}})
-     }
- ###该函数x为分组编号,或者是细胞类型,或者是时间序列,cellinfo为细胞分组(行名为细胞,Cell.type为分组)
- ###num为想要提取的top n基因或者pathway,依赖dplyr,limma
- cal_Diff<-function(x,cellInfo,res_es,num){
-     need<-cellInfo%>%transmute(new_group=re_grp(Cell.type,x))%>%.$new_group
-     grouP<-as.factor(need)
-     desigN<-model.matrix(~grouP+0)
-     rownames(desigN)<-rownames(cellInfo)
-     comparE<-makeContrasts(grouPA-grouPB,levels=desigN)
-     fiT <- lmFit(res_es, desigN)
-     fiT2 <- contrasts.fit(fiT, comparE)
-     fiT3 <- eBayes(fiT2)
-     Diff<-topTable(fiT3,p.value=0.05,coef=1,num=num)
-     if (nrow(Diff)>0){
-         Diff$cluster<-x
-         Diff<-Diff%>%tibble::rownames_to_column(var="Name")%>%tbl_df()%>%select(Name,logFC,t,P.Value,adj.P.Val,cluster)
-     }else{
-         Diff<-tibble::tibble(Name=NA,logFC=NA,t=NA,P.Value=NA,adj.P.Val=NA,cluster=NA)
-     }
- }
- ###该函数mat输入表达矩阵,info输入分组信息(行名为细胞,Cell.type为分组信息),需要预先载入dplyr
- Average_express<-function(mat,info){
-     new_res<-mat%>%tibble::rownames_to_column(var="Name")%>%tidyr::gather(cell,value,-Name)
-     new_cellinfo<-info 
-     new_cellinfo<-tibble::rownames_to_column(new_cellinfo,var="cell")
-     all<-left_join(new_res,new_cellinfo,by="cell")
-     fuck<-all%>%group_by(Name,Cell.type)%>%summarise(Mean=mean(value))%>%tidyr::spread(Cell.type,Mean)
-     fuck
- }
- for(i in c("PF","PO3","PO7","PO11")){
-     expMat<- read.table(pystr_format("cluster_1.exprMat_{1}.txt",i),sep="\t",header=TRUE,row.names=1,check.names=FALSE,stringsAsFactors=FALSE)
-     cellInfo<-read.table(pystr_format("cluster_1.cellInfo_{1}.txt",i),sep="\t",header=TRUE,row.names=1,check.names=FALSE,stringsAsFactors=FALSE)
-     mydata<-as.matrix(expMat)
-     res_es <- gsva(mydata, geneSets, min.sz=10, max.sz=500, verbose=FALSE, parallel.sz=8)
-     res_es<-as.data.frame(res_es)
-     write.table(res_es,pystr_format("./cell_diff/cluster_1_{1}_c3.txt",i),sep="\t",quote=FALSE)
-     ##差异分析
-     ##该函数用来分组,x为一个列表,s为输入的分组信息
-     ##计算差异基因集的合集
-     #fin<-unique(unlist(lapply(list(0,1,2,3),cal_Diff,cellInfo=cellInfo,res_es=res_es,num=10)))
-     ##这里要明确细胞分类的个数,多了会报错
-     fin<-do.call('rbind',lapply(list(0,1,2,3),cal_Diff,cellInfo=cellInfo,res_es=res_es,num=10))
-     fin<-fin%>%filter(!is.na(cluster))
-     write.table(fin,pystr_format("./cell_diff/cluster_1_{1}_c3_diff_geneset.txt",i),sep="\t",quote=FALSE,row.names=FALSE)
-     ###计算平均表达量矩阵
-     heihei2<-Average_express(res_es,cellInfo)
-     heihei_need<-filter(heihei2,Name%in%unique(fin$Name))
-     houhou2<-tibble::column_to_rownames(heihei_need,var="Name") 
-     ###画热图
-     pheatmap::pheatmap(houhou2,file=pystr_format("./cell_diff/cluster_1_{1}_c3_diff_heat.pdf",i))
- }