ssGSEA
免疫基因集来自:
从这篇文章里下载Table S6. List of Pan-cancer Immune Metagenes,另存为Immune_infiltraton_ggGSEA.csv
由于ssgsea是用预先定义的基因集去计算每组打分,所以基因集是可以自己定义的
关于输入数据格式
We calculate now GSV A enrichment scores for these gene sets using first the microarray data and then the RNA-seq integer count data. Note that the only requirement to do the latter is to set the argument kcdf=“Poisson” which is “Gaussian” by default. Note, however, that if our RNA-seq derived expression levels would be continous, such as log-CPMs, log-RPKMs or log-TPMs, the the default value of the kcdf argument should remain unchanged.
如果是芯片数据,log2即可用;如果是转录组数据,则count,RPKM,FPKM,TPM皆可用,使用count时须指明参数kcdf=“Poisson”。
代码:
library(GSEABase)library(GSVA)library(pheatmap)load('exp_group.Rdata') # exp为表达矩阵,group是分组geneset = read.csv('Immune_infiltration_ggGSEA.csv') # 改路径geneset = split(geneset$Metagene, geneset$Cell.type)re = gsva(exp, geneset, # exp为矩阵,数据框会报错method = 'ssgsea', kcdf = 'Gaussian', # 如果exp为log2后的,kcdf为Gaussian,如果kcdf为count,kcdf为Possionmx.diff = FALSE, verbose = FALSE)re = as.data.frame(re) # 数据框作图更方便annotation_col = data.frame(group = group,row.names = colnames(exp))p = pheatmap(re, scale = 'row',show_colnames = F,annotation_col = annotation_col,color = colorRampPalette(c("navy", "white", "firebrick3"))(100))
一个基本的热图就做好了,为了好看一些,下面是美化步骤(在上面的基础上接着跑)
re = re[p$tree_row$order,]re = re[,p$tree_col$order]bk = c(seq(-2,-0.1,by=0.01),seq(0,2,by=0.01)) # 这些数值都可以自己调整ann_colors = list(group = c(cyst = "#F0F3BD", control = "#B8BEDD")) # 定义注释的颜色,cyst和control为group的分类p = pheatmap(re, scale = 'row',show_colnames = F,cluster_cols = F,cluster_rows = F,annotation_col = annotation_col,annotation_colors = ann_colors,color = c(colorRampPalette(colors = c("#247BA0","white"))(length(bk)/2),colorRampPalette(colors = c("white","#F25F5C"))(length(bk)/2)),breaks=bk,cellwidth = 25, # 可以改cellheight = 15, # 可以改gaps_col = 13, # 可以改gaps_row = 16) # 可以改
看图↓
ESTIMATE
ESTIMATE感觉没啥好说的,就直接跑
用来做基质打分、免疫打分和肿瘤纯度
借鉴了生信技能树的代码
library(estimate)library(tibble)library(ggplot2) # 这个用ggplot2画图library(ggpubr)estimate <- function(dat,pro){input.f=paste0(pro,'_estimate_input.txt')output.f=paste0(pro,'_estimate_gene.gct')output.ds=paste0(pro,'_estimate_score.gct')write.table(dat,file = input.f,sep = '\t',quote = F)filterCommonGenes(input.f=input.f,output.f=output.f ,id="GeneSymbol")estimateScore(input.ds = output.f,output.ds=output.ds,platform="affymetrix") ## 注意platform,如果输入dat是count,则为illuminascores=read.table(output.ds,skip = 2,header = T)rownames(scores)=scores[,1]scores=t(scores[,3:ncol(scores)])return(scores)}pro= gse_number # 这里可以改成癌症或者其他工程名字dat = exp # exp为log2后的scores=estimate(dat,pro)scores = rownames_to_column(as.data.frame(scores),"id")scores$id = str_replace_all(scores$id,"\\.","-")scores$TumorPurity <- cos(0.6049872018+0.0001467884*scores[,4])group = rep(c('cyst','control'), c(13,8))group = factor(group, levels = unique(group))scores$group = group
作图
ggplot(scores, aes(group, ImmuneScore))+geom_boxplot(aes(fill= group), show.legend = F) +scale_fill_manual(values = c("#E76F51", "#118AB2"))+geom_jitter(size = 1, alpha = 0.5)+theme_classic()+xlab("")+ylab("Immune Score")+labs(title="Title")+theme(plot.title = element_text(hjust = 0.5))+stat_compare_means(aes(label = ..p.signif..),comparisons = list(c('cyst','control')),na.rm = FALSE)+theme(panel.background = element_rect(colour = "black",size = 1))
CIBERSORT
我个人是真得非常不推荐用这个R包做,因为很麻烦!!!第四种方法IOBR做出的结果和这个是一样的,还很简单
用来做22种免疫细胞的比例图
需要提前准备好三个文件,CIBERSORT.R,LM22.txt,GSE7869_exp.txt(代码里有注释)
library(e1071)library(parallel)library(preprocessCore)library(ggplot2)source('import/CIBERSORT.R') # cibersort 代码sig_matrix = 'LM22.txt' # 22种免疫细胞mixture_file = "GSE7869_exp.txt" # 把表达矩阵提前保存为txt格式,csv我会报错res_cibersort = CIBERSORT(sig_matrix, mixture_file, perm = 100, QN = TRUE) # 这步有点慢res_cibersort = res_cibersort[,1:22] # 23:25列不是免疫细胞ciber.res = res_cibersort[,colSums(res_cibersort)>0] # 去掉丰度全为0的细胞
作图
mycol = alpha(rainbow(ncol(ciber.res)),0.7) # 创建彩虹色版(0.7透明度)par(bty = 'o', mgp = c(2.5,0.3,0), mar = c(2.1,5.1,2.1,20.1),tcl = -.25, las = 1, xpd = 1)barplot(as.matrix(t(ciber.res)),border = NA, # 柱子无边框线names.arg = rep("",nrow(ciber.res)), # 无横坐标样本名yaxt = 'n', # 先不绘制y轴ylab = 'Relatvie percentage',col = mycol)axis(side = 2, at = c(0, 0.2, 0.4, 0.6, 0.8, 1), # 补齐y轴添加百分号labels = c('0%','20%','40%','60%','80%','100%'))legend(par('usr')[2], # 可以在后面+-调整位置par('usr')[4],legend = colnames(ciber.res),xpd = T,fill = mycol,cex = 1,border = NA,y.intersp = 1,x.intersp = 1,bty = 'n')dev.off()
看图 (还是很漂亮的)
IOBR
和CIBERSORT一样,用来做免疫细胞比例的,推荐用这个,很方便
library(IOBR)
library(tidyverse)
library(ggplot2)
library(stringr)
cibersort2<-deconvo_tme(eset = exp, # log2之后的
method = "cibersort",
arrays = TRUE, # 高通量为FALSE
perm = 200 )
cibersort2 = cibersort2[,1:23]
cibersort2 = column_to_rownames(cibersort2, 'ID')
ciber.res2 = cibersort2[,colSums(cibersort2)>0] # 去掉丰度全为0的细胞
colnames(ciber.res2) = str_split(colnames(ciber.res2), '_CIBERSORT',simplify = T)[,1]
colnames(ciber.res2) = gsub('_'," ",colnames(ciber.res2))
作图
mycol = alpha(rainbow(ncol(ciber.res2)),0.7) # 创建彩虹色版(0.7透明度)
par(bty = 'o', mgp = c(2.5,0.3,0), mar = c(2.1,5.1,2.1,20.1),
tcl = -.25, las = 1, xpd = 1)
barplot(as.matrix(t(ciber.res2)),
border = NA, # 柱子无边框线
names.arg = rep("",nrow(ciber.res2)), # 无横坐标样本名
yaxt = 'n', # 先不绘制y轴
ylab = 'Relatvie percentage',
col = mycol)
axis(side = 2, at = c(0, 0.2, 0.4, 0.6, 0.8, 1), # 补齐y轴添加百分号
labels = c('0%','20%','40%','60%','80%','100%'))
legend(par('usr')[2], # 可以在后面+-调整位置
par('usr')[4],
legend = colnames(ciber.res2),
xpd = T,
fill = mycol,
cex = 1,
border = NA,
y.intersp = 1,
x.intersp = 1,
bty = 'n')
dev.off()
图和上面那个一模一样,还不用准备那几个文件,完美~
此外,IOBR还可以做其他方法的免疫分析
IOBR集成了8种已发表的对肿瘤微环境 (TME) contexture进行decoding的方法:CIBERSORT、TIMER、xCell、MCPcounter、ESITMATE、EPIC、IPS、quantTIseq。此外,IOBR收集了255个已发表的特征基因集,涉及肿瘤微环境、肿瘤代谢、m6A、外泌体、微卫星不稳定性和三级淋巴结构。IOBR还采用了多种方法进行变量转换、可视化、批量生存分析、特征选择和统计分析,并且支持相应结果的批量分析和可视化。
