ssGSEA

免疫基因集来自:

Pan-cancer Immunogenomic Analyses Reveal Genotype-Immunophenotype Relationships and Predictors of Response to Checkpoint Blockade - ScienceDirect

从这篇文章里下载Table S6. List of Pan-cancer Immune Metagenes,另存为Immune_infiltraton_ggGSEA.csv

由于ssgsea是用预先定义的基因集去计算每组打分,所以基因集是可以自己定义的

关于输入数据格式

We calculate now GSV A enrichment scores for these gene sets using first the microarray data and then the RNA-seq integer count data. Note that the only requirement to do the latter is to set the argument kcdf=“Poisson” which is “Gaussian” by default. Note, however, that if our RNA-seq derived expression levels would be continous, such as log-CPMs, log-RPKMs or log-TPMs, the the default value of the kcdf argument should remain unchanged.

如果是芯片数据,log2即可用;如果是转录组数据,则count,RPKM,FPKM,TPM皆可用,使用count时须指明参数kcdf=“Poisson”。

代码:

  1. library(GSEABase)
  2. library(GSVA)
  3. library(pheatmap)
  4. load('exp_group.Rdata') # exp为表达矩阵,group是分组
  5. geneset = read.csv('Immune_infiltration_ggGSEA.csv') # 改路径
  6. geneset = split(geneset$Metagene, geneset$Cell.type)
  7. re = gsva(exp, geneset, # exp为矩阵,数据框会报错
  8. method = 'ssgsea', kcdf = 'Gaussian', # 如果exp为log2后的,kcdf为Gaussian,如果kcdf为count,kcdf为Possion
  9. mx.diff = FALSE, verbose = FALSE)
  10. re = as.data.frame(re) # 数据框作图更方便
  11. annotation_col = data.frame(group = group,
  12. row.names = colnames(exp))
  13. p = pheatmap(re, scale = 'row',
  14. show_colnames = F,
  15. annotation_col = annotation_col,
  16. color = colorRampPalette(c("navy", "white", "firebrick3"))(100))

一个基本的热图就做好了,为了好看一些,下面是美化步骤(在上面的基础上接着跑)

  1. re = re[p$tree_row$order,]
  2. re = re[,p$tree_col$order]
  3. bk = c(seq(-2,-0.1,by=0.01),seq(0,2,by=0.01)) # 这些数值都可以自己调整
  4. ann_colors = list(
  5. group = c(cyst = "#F0F3BD", control = "#B8BEDD")
  6. ) # 定义注释的颜色,cyst和control为group的分类
  7. p = pheatmap(re, scale = 'row',
  8. show_colnames = F,
  9. cluster_cols = F,
  10. cluster_rows = F,
  11. annotation_col = annotation_col,
  12. annotation_colors = ann_colors,
  13. color = c(colorRampPalette(colors = c("#247BA0","white"))(length(bk)/2),
  14. colorRampPalette(colors = c("white","#F25F5C"))(length(bk)/2)),
  15. breaks=bk,
  16. cellwidth = 25, # 可以改
  17. cellheight = 15, # 可以改
  18. gaps_col = 13, # 可以改
  19. gaps_row = 16) # 可以改

看图↓

https://imgtu.com/i/otCO1A

ESTIMATE

ESTIMATE感觉没啥好说的,就直接跑

用来做基质打分、免疫打分和肿瘤纯度

借鉴了生信技能树的代码

  1. library(estimate)
  2. library(tibble)
  3. library(ggplot2) # 这个用ggplot2画图
  4. library(ggpubr)
  5. estimate <- function(dat,pro){
  6. input.f=paste0(pro,'_estimate_input.txt')
  7. output.f=paste0(pro,'_estimate_gene.gct')
  8. output.ds=paste0(pro,'_estimate_score.gct')
  9. write.table(dat,file = input.f,sep = '\t',quote = F)
  10. filterCommonGenes(input.f=input.f,
  11. output.f=output.f ,
  12. id="GeneSymbol")
  13. estimateScore(input.ds = output.f,
  14. output.ds=output.ds,
  15. platform="affymetrix") ## 注意platform,如果输入dat是count,则为illumina
  16. scores=read.table(output.ds,skip = 2,header = T)
  17. rownames(scores)=scores[,1]
  18. scores=t(scores[,3:ncol(scores)])
  19. return(scores)
  20. }
  21. pro= gse_number # 这里可以改成癌症或者其他工程名字
  22. dat = exp # exp为log2后的
  23. scores=estimate(dat,pro)
  24. scores = rownames_to_column(as.data.frame(scores),"id")
  25. scores$id = str_replace_all(scores$id,"\\.","-")
  26. scores$TumorPurity <- cos(0.6049872018+0.0001467884*scores[,4])
  27. group = rep(c('cyst','control'), c(13,8))
  28. group = factor(group, levels = unique(group))
  29. scores$group = group

作图

  1. ggplot(scores, aes(group, ImmuneScore))+
  2. geom_boxplot(aes(fill= group), show.legend = F) +
  3. scale_fill_manual(values = c("#E76F51", "#118AB2"))+
  4. geom_jitter(size = 1, alpha = 0.5)+
  5. theme_classic()+
  6. xlab("")+
  7. ylab("Immune Score")+
  8. labs(title="Title")+
  9. theme(plot.title = element_text(hjust = 0.5))+
  10. stat_compare_means(aes(label = ..p.signif..),
  11. comparisons = list(c('cyst','control')),
  12. na.rm = FALSE)+
  13. theme(panel.background = element_rect(colour = "black",
  14. size = 1))

CIBERSORT

我个人是真得非常不推荐用这个R包做,因为很麻烦!!!第四种方法IOBR做出的结果和这个是一样的,还很简单

用来做22种免疫细胞的比例图

需要提前准备好三个文件,CIBERSORT.R,LM22.txt,GSE7869_exp.txt(代码里有注释)

  1. library(e1071)
  2. library(parallel)
  3. library(preprocessCore)
  4. library(ggplot2)
  5. source('import/CIBERSORT.R') # cibersort 代码
  6. sig_matrix = 'LM22.txt' # 22种免疫细胞
  7. mixture_file = "GSE7869_exp.txt" # 把表达矩阵提前保存为txt格式,csv我会报错
  8. res_cibersort = CIBERSORT(sig_matrix, mixture_file, perm = 100, QN = TRUE) # 这步有点慢
  9. res_cibersort = res_cibersort[,1:22] # 23:25列不是免疫细胞
  10. ciber.res = res_cibersort[,colSums(res_cibersort)>0] # 去掉丰度全为0的细胞

作图

  1. mycol = alpha(rainbow(ncol(ciber.res)),0.7) # 创建彩虹色版(0.7透明度)
  2. par(bty = 'o', mgp = c(2.5,0.3,0), mar = c(2.1,5.1,2.1,20.1),
  3. tcl = -.25, las = 1, xpd = 1)
  4. barplot(as.matrix(t(ciber.res)),
  5. border = NA, # 柱子无边框线
  6. names.arg = rep("",nrow(ciber.res)), # 无横坐标样本名
  7. yaxt = 'n', # 先不绘制y轴
  8. ylab = 'Relatvie percentage',
  9. col = mycol)
  10. axis(side = 2, at = c(0, 0.2, 0.4, 0.6, 0.8, 1), # 补齐y轴添加百分号
  11. labels = c('0%','20%','40%','60%','80%','100%'))
  12. legend(par('usr')[2], # 可以在后面+-调整位置
  13. par('usr')[4],
  14. legend = colnames(ciber.res),
  15. xpd = T,
  16. fill = mycol,
  17. cex = 1,
  18. border = NA,
  19. y.intersp = 1,
  20. x.intersp = 1,
  21. bty = 'n')
  22. dev.off()

看图 (还是很漂亮的)

https://imgtu.com/i/ot5uc9

IOBR

和CIBERSORT一样,用来做免疫细胞比例的,推荐用这个,很方便

library(IOBR)
library(tidyverse)
library(ggplot2)
library(stringr)

cibersort2<-deconvo_tme(eset = exp, # log2之后的
                       method = "cibersort", 
                       arrays = TRUE, # 高通量为FALSE
                       perm = 200 )
cibersort2 = cibersort2[,1:23]
cibersort2 = column_to_rownames(cibersort2, 'ID')
ciber.res2 = cibersort2[,colSums(cibersort2)>0] # 去掉丰度全为0的细胞
colnames(ciber.res2) = str_split(colnames(ciber.res2), '_CIBERSORT',simplify = T)[,1]
colnames(ciber.res2) = gsub('_'," ",colnames(ciber.res2))

作图

mycol = alpha(rainbow(ncol(ciber.res2)),0.7) # 创建彩虹色版(0.7透明度)
par(bty = 'o', mgp = c(2.5,0.3,0), mar = c(2.1,5.1,2.1,20.1),
    tcl = -.25, las = 1, xpd = 1)
barplot(as.matrix(t(ciber.res2)),
        border = NA, # 柱子无边框线
        names.arg = rep("",nrow(ciber.res2)), # 无横坐标样本名
        yaxt = 'n', # 先不绘制y轴
        ylab = 'Relatvie percentage',
        col = mycol)
axis(side = 2, at = c(0, 0.2, 0.4, 0.6, 0.8, 1), # 补齐y轴添加百分号
     labels = c('0%','20%','40%','60%','80%','100%'))
legend(par('usr')[2], # 可以在后面+-调整位置
       par('usr')[4],
       legend = colnames(ciber.res2),
       xpd = T,
       fill = mycol,
       cex = 1,
       border = NA,
       y.intersp = 1,
       x.intersp = 1,
       bty = 'n')
dev.off()

图和上面那个一模一样,还不用准备那几个文件,完美~
此外,IOBR还可以做其他方法的免疫分析

IOBR集成了8种已发表的对肿瘤微环境 (TME) contexture进行decoding的方法:CIBERSORT、TIMER、xCell、MCPcounter、ESITMATE、EPIC、IPS、quantTIseq。此外,IOBR收集了255个已发表的特征基因集,涉及肿瘤微环境、肿瘤代谢、m6A、外泌体、微卫星不稳定性和三级淋巴结构。IOBR还采用了多种方法进行变量转换、可视化、批量生存分析、特征选择和统计分析,并且支持相应结果的批量分析和可视化。