需要的数据:

  • 基因表达矩阵:行是基因,列是细胞
  • 细胞分组信息:一共一列,行名为细胞,第一列名字为Cell.type,代表分组信息

依赖的R包

  • GSEAbase
  • limma
  • dplyr
  • tidyr
  • GSVA
  • pystr (github)
  • purrr
  • tibble
  • pheatmap

代码

  1. library(GSEABase)
  2. library(Biobase)
  3. library(genefilter)
  4. library(limma)
  5. library(RColorBrewer)
  6. library(GSVA)
  7. library(dplyr)
  8. library(pystr)
  9. ##这里可以任意切换geneset,只要你有
  10. geneSets <- getGmt("/dellfsqd2/ST_LBI/USER/panxiaoguang/SingleCell/liangxue_data/cluster_1/GSVA/DataBase/c3.tft.v7.0.symbols.gmt")
  11. re_grp<-function(x,s){
  12. purrr::map_chr(x,function(haha){if (haha==s){"A"}else{"B"}})
  13. }
  14. ###该函数x为分组编号,或者是细胞类型,或者是时间序列,cellinfo为细胞分组(行名为细胞,Cell.type为分组)
  15. ###num为想要提取的top n基因或者pathway,依赖dplyr,limma
  16. cal_Diff<-function(x,cellInfo,res_es,num){
  17. need<-cellInfo%>%transmute(new_group=re_grp(Cell.type,x))%>%.$new_group
  18. grouP<-as.factor(need)
  19. desigN<-model.matrix(~grouP+0)
  20. rownames(desigN)<-rownames(cellInfo)
  21. comparE<-makeContrasts(grouPA-grouPB,levels=desigN)
  22. fiT <- lmFit(res_es, desigN)
  23. fiT2 <- contrasts.fit(fiT, comparE)
  24. fiT3 <- eBayes(fiT2)
  25. Diff<-topTable(fiT3,p.value=0.05,coef=1,num=num)
  26. if (nrow(Diff)>0){
  27. Diff$cluster<-x
  28. Diff<-Diff%>%tibble::rownames_to_column(var="Name")%>%tbl_df()%>%select(Name,logFC,t,P.Value,adj.P.Val,cluster)
  29. }else{
  30. Diff<-tibble::tibble(Name=NA,logFC=NA,t=NA,P.Value=NA,adj.P.Val=NA,cluster=NA)
  31. }
  32. }
  33. ###该函数mat输入表达矩阵,info输入分组信息(行名为细胞,Cell.type为分组信息),需要预先载入dplyr
  34. Average_express<-function(mat,info){
  35. new_res<-mat%>%tibble::rownames_to_column(var="Name")%>%tidyr::gather(cell,value,-Name)
  36. new_cellinfo<-info
  37. new_cellinfo<-tibble::rownames_to_column(new_cellinfo,var="cell")
  38. all<-left_join(new_res,new_cellinfo,by="cell")
  39. fuck<-all%>%group_by(Name,Cell.type)%>%summarise(Mean=mean(value))%>%tidyr::spread(Cell.type,Mean)
  40. fuck
  41. }
  42. for(i in c("PF","PO3","PO7","PO11")){
  43. expMat<- read.table(pystr_format("cluster_1.exprMat_{1}.txt",i),sep="\t",header=TRUE,row.names=1,check.names=FALSE,stringsAsFactors=FALSE)
  44. cellInfo<-read.table(pystr_format("cluster_1.cellInfo_{1}.txt",i),sep="\t",header=TRUE,row.names=1,check.names=FALSE,stringsAsFactors=FALSE)
  45. mydata<-as.matrix(expMat)
  46. res_es <- gsva(mydata, geneSets, min.sz=10, max.sz=500, verbose=FALSE, parallel.sz=8)
  47. res_es<-as.data.frame(res_es)
  48. write.table(res_es,pystr_format("./cell_diff/cluster_1_{1}_c3.txt",i),sep="\t",quote=FALSE)
  49. ##差异分析
  50. ##该函数用来分组,x为一个列表,s为输入的分组信息
  51. ##计算差异基因集的合集
  52. #fin<-unique(unlist(lapply(list(0,1,2,3),cal_Diff,cellInfo=cellInfo,res_es=res_es,num=10)))
  53. ##这里要明确细胞分类的个数,多了会报错
  54. fin<-do.call('rbind',lapply(list(0,1,2,3),cal_Diff,cellInfo=cellInfo,res_es=res_es,num=10))
  55. fin<-fin%>%filter(!is.na(cluster))
  56. write.table(fin,pystr_format("./cell_diff/cluster_1_{1}_c3_diff_geneset.txt",i),sep="\t",quote=FALSE,row.names=FALSE)
  57. ###计算平均表达量矩阵
  58. heihei2<-Average_express(res_es,cellInfo)
  59. heihei_need<-filter(heihei2,Name%in%unique(fin$Name))
  60. houhou2<-tibble::column_to_rownames(heihei_need,var="Name")
  61. ###画热图
  62. pheatmap::pheatmap(houhou2,file=pystr_format("./cell_diff/cluster_1_{1}_c3_diff_heat.pdf",i))
  63. }