四种方法做免疫浸润 - 《bulk转录组代码》

ssGSEA
ESTIMATE
CIBERSORT
IOBR

ssGSEA

免疫基因集来自：

Pan-cancer Immunogenomic Analyses Reveal Genotype-Immunophenotype Relationships and Predictors of Response to Checkpoint Blockade - ScienceDirect

从这篇文章里下载Table S6. List of Pan-cancer Immune Metagenes，另存为Immune_infiltraton_ggGSEA.csv

由于ssgsea是用预先定义的基因集去计算每组打分，所以基因集是可以自己定义的

关于输入数据格式

We calculate now GSV A enrichment scores for these gene sets using first the microarray data and then the RNA-seq integer count data. Note that the only requirement to do the latter is to set the argument kcdf=“Poisson” which is “Gaussian” by default. Note, however, that if our RNA-seq derived expression levels would be continous, such as log-CPMs, log-RPKMs or log-TPMs, the the default value of the kcdf argument should remain unchanged.

如果是芯片数据，log2即可用；如果是转录组数据，则count，RPKM，FPKM，TPM皆可用，使用count时须指明参数kcdf=“Poisson”。

代码：

library(GSEABase)
library(GSVA)
library(pheatmap)
load('exp_group.Rdata') # exp为表达矩阵，group是分组
geneset = read.csv('Immune_infiltration_ggGSEA.csv') # 改路径
geneset = split(geneset$Metagene, geneset$Cell.type)
re = gsva(exp, geneset,                   # exp为矩阵，数据框会报错
        method = 'ssgsea', kcdf = 'Gaussian', # 如果exp为log2后的，kcdf为Gaussian，如果kcdf为count，kcdf为Possion
            mx.diff = FALSE, verbose = FALSE)
re = as.data.frame(re) # 数据框作图更方便
annotation_col = data.frame(group = group,
                          row.names = colnames(exp))
p = pheatmap(re, scale = 'row',
               show_colnames = F,
                   annotation_col = annotation_col,
               color = colorRampPalette(c("navy", "white", "firebrick3"))(100))

一个基本的热图就做好了，为了好看一些，下面是美化步骤（在上面的基础上接着跑）

re = re[p$tree_row$order,]
re = re[,p$tree_col$order]
bk = c(seq(-2,-0.1,by=0.01),seq(0,2,by=0.01)) # 这些数值都可以自己调整
ann_colors = list(
  group = c(cyst = "#F0F3BD", control = "#B8BEDD")
) # 定义注释的颜色，cyst和control为group的分类
p = pheatmap(re, scale = 'row',
             show_colnames = F,
             cluster_cols = F,
             cluster_rows = F,
             annotation_col = annotation_col,
             annotation_colors = ann_colors,
             color = c(colorRampPalette(colors = c("#247BA0","white"))(length(bk)/2),
                       colorRampPalette(colors = c("white","#F25F5C"))(length(bk)/2)),
             breaks=bk,
             cellwidth = 25,  # 可以改
             cellheight = 15, # 可以改
             gaps_col = 13,   # 可以改 
             gaps_row = 16)   # 可以改

看图↓

https://imgtu.com/i/otCO1A

ESTIMATE

ESTIMATE感觉没啥好说的，就直接跑

用来做基质打分、免疫打分和肿瘤纯度

借鉴了生信技能树的代码

library(estimate)
library(tibble)
library(ggplot2) # 这个用ggplot2画图
library(ggpubr)
estimate <- function(dat,pro){
  input.f=paste0(pro,'_estimate_input.txt')
  output.f=paste0(pro,'_estimate_gene.gct')
  output.ds=paste0(pro,'_estimate_score.gct')
  write.table(dat,file = input.f,sep = '\t',quote = F)
  filterCommonGenes(input.f=input.f,
                    output.f=output.f ,
                    id="GeneSymbol")
  estimateScore(input.ds = output.f,
                output.ds=output.ds,
                platform="affymetrix")   ## 注意platform，如果输入dat是count，则为illumina
  scores=read.table(output.ds,skip = 2,header = T)
  rownames(scores)=scores[,1]
  scores=t(scores[,3:ncol(scores)])
  return(scores)
}
pro= gse_number # 这里可以改成癌症或者其他工程名字
dat = exp # exp为log2后的
scores=estimate(dat,pro)
scores = rownames_to_column(as.data.frame(scores),"id")
scores$id = str_replace_all(scores$id,"\\.","-")
scores$TumorPurity <- cos(0.6049872018+0.0001467884*scores[,4])
group = rep(c('cyst','control'), c(13,8))
group = factor(group, levels = unique(group))
scores$group = group

作图

ggplot(scores, aes(group, ImmuneScore))+
       geom_boxplot(aes(fill= group), show.legend = F) +
       scale_fill_manual(values = c("#E76F51", "#118AB2"))+
       geom_jitter(size = 1, alpha = 0.5)+
       theme_classic()+
       xlab("")+
       ylab("Immune Score")+
       labs(title="Title")+
       theme(plot.title = element_text(hjust = 0.5))+
       stat_compare_means(aes(label = ..p.signif..),
                          comparisons = list(c('cyst','control')),
                          na.rm = FALSE)+
       theme(panel.background = element_rect(colour = "black", 
                                        size = 1))

CIBERSORT

我个人是真得非常不推荐用这个R包做，因为很麻烦！！！第四种方法IOBR做出的结果和这个是一样的，还很简单

用来做22种免疫细胞的比例图

需要提前准备好三个文件，CIBERSORT.R，LM22.txt，GSE7869_exp.txt（代码里有注释）

library(e1071)
library(parallel)
library(preprocessCore)
library(ggplot2)
source('import/CIBERSORT.R') # cibersort 代码
sig_matrix = 'LM22.txt'      # 22种免疫细胞
mixture_file = "GSE7869_exp.txt" # 把表达矩阵提前保存为txt格式，csv我会报错
res_cibersort = CIBERSORT(sig_matrix, mixture_file, perm = 100, QN = TRUE) # 这步有点慢
res_cibersort = res_cibersort[,1:22] # 23:25列不是免疫细胞
ciber.res = res_cibersort[,colSums(res_cibersort)>0] # 去掉丰度全为0的细胞

作图

mycol = alpha(rainbow(ncol(ciber.res)),0.7) # 创建彩虹色版（0.7透明度）
par(bty = 'o', mgp = c(2.5,0.3,0), mar = c(2.1,5.1,2.1,20.1),
    tcl = -.25, las = 1, xpd = 1)
barplot(as.matrix(t(ciber.res)),
        border = NA, # 柱子无边框线
        names.arg = rep("",nrow(ciber.res)), # 无横坐标样本名
        yaxt = 'n', # 先不绘制y轴
        ylab = 'Relatvie percentage',
        col = mycol)
axis(side = 2, at = c(0, 0.2, 0.4, 0.6, 0.8, 1), # 补齐y轴添加百分号
     labels = c('0%','20%','40%','60%','80%','100%'))
legend(par('usr')[2], # 可以在后面+-调整位置
       par('usr')[4],
       legend = colnames(ciber.res),
       xpd = T,
       fill = mycol,
       cex = 1,
       border = NA,
       y.intersp = 1,
       x.intersp = 1,
       bty = 'n')
dev.off()

看图（还是很漂亮的）

https://imgtu.com/i/ot5uc9

IOBR

和CIBERSORT一样，用来做免疫细胞比例的，推荐用这个，很方便

library(IOBR)
library(tidyverse)
library(ggplot2)
library(stringr)

cibersort2<-deconvo_tme(eset = exp, # log2之后的
                       method = "cibersort", 
                       arrays = TRUE, # 高通量为FALSE
                       perm = 200 )
cibersort2 = cibersort2[,1:23]
cibersort2 = column_to_rownames(cibersort2, 'ID')
ciber.res2 = cibersort2[,colSums(cibersort2)>0] # 去掉丰度全为0的细胞
colnames(ciber.res2) = str_split(colnames(ciber.res2), '_CIBERSORT',simplify = T)[,1]
colnames(ciber.res2) = gsub('_'," ",colnames(ciber.res2))

作图

mycol = alpha(rainbow(ncol(ciber.res2)),0.7) # 创建彩虹色版（0.7透明度）
par(bty = 'o', mgp = c(2.5,0.3,0), mar = c(2.1,5.1,2.1,20.1),
    tcl = -.25, las = 1, xpd = 1)
barplot(as.matrix(t(ciber.res2)),
        border = NA, # 柱子无边框线
        names.arg = rep("",nrow(ciber.res2)), # 无横坐标样本名
        yaxt = 'n', # 先不绘制y轴
        ylab = 'Relatvie percentage',
        col = mycol)
axis(side = 2, at = c(0, 0.2, 0.4, 0.6, 0.8, 1), # 补齐y轴添加百分号
     labels = c('0%','20%','40%','60%','80%','100%'))
legend(par('usr')[2], # 可以在后面+-调整位置
       par('usr')[4],
       legend = colnames(ciber.res2),
       xpd = T,
       fill = mycol,
       cex = 1,
       border = NA,
       y.intersp = 1,
       x.intersp = 1,
       bty = 'n')
dev.off()

图和上面那个一模一样，还不用准备那几个文件，完美~
此外，IOBR还可以做其他方法的免疫分析

IOBR集成了8种已发表的对肿瘤微环境 (TME) contexture进行decoding的方法：CIBERSORT、TIMER、xCell、MCPcounter、ESITMATE、EPIC、IPS、quantTIseq。此外，IOBR收集了255个已发表的特征基因集，涉及肿瘤微环境、肿瘤代谢、m6A、外泌体、微卫星不稳定性和三级淋巴结构。IOBR还采用了多种方法进行变量转换、可视化、批量生存分析、特征选择和统计分析，并且支持相应结果的批量分析和可视化。