ssGSEA
免疫基因集来自:
从这篇文章里下载Table S6. List of Pan-cancer Immune Metagenes,另存为Immune_infiltraton_ggGSEA.csv
由于ssgsea是用预先定义的基因集去计算每组打分,所以基因集是可以自己定义的
关于输入数据格式
We calculate now GSV A enrichment scores for these gene sets using first the microarray data and then the RNA-seq integer count data. Note that the only requirement to do the latter is to set the argument kcdf=“Poisson” which is “Gaussian” by default. Note, however, that if our RNA-seq derived expression levels would be continous, such as log-CPMs, log-RPKMs or log-TPMs, the the default value of the kcdf argument should remain unchanged.
如果是芯片数据,log2即可用;如果是转录组数据,则count,RPKM,FPKM,TPM皆可用,使用count时须指明参数kcdf=“Poisson”。
代码:
library(GSEABase)
library(GSVA)
library(pheatmap)
load('exp_group.Rdata') # exp为表达矩阵,group是分组
geneset = read.csv('Immune_infiltration_ggGSEA.csv') # 改路径
geneset = split(geneset$Metagene, geneset$Cell.type)
re = gsva(exp, geneset, # exp为矩阵,数据框会报错
method = 'ssgsea', kcdf = 'Gaussian', # 如果exp为log2后的,kcdf为Gaussian,如果kcdf为count,kcdf为Possion
mx.diff = FALSE, verbose = FALSE)
re = as.data.frame(re) # 数据框作图更方便
annotation_col = data.frame(group = group,
row.names = colnames(exp))
p = pheatmap(re, scale = 'row',
show_colnames = F,
annotation_col = annotation_col,
color = colorRampPalette(c("navy", "white", "firebrick3"))(100))
一个基本的热图就做好了,为了好看一些,下面是美化步骤(在上面的基础上接着跑)
re = re[p$tree_row$order,]
re = re[,p$tree_col$order]
bk = c(seq(-2,-0.1,by=0.01),seq(0,2,by=0.01)) # 这些数值都可以自己调整
ann_colors = list(
group = c(cyst = "#F0F3BD", control = "#B8BEDD")
) # 定义注释的颜色,cyst和control为group的分类
p = pheatmap(re, scale = 'row',
show_colnames = F,
cluster_cols = F,
cluster_rows = F,
annotation_col = annotation_col,
annotation_colors = ann_colors,
color = c(colorRampPalette(colors = c("#247BA0","white"))(length(bk)/2),
colorRampPalette(colors = c("white","#F25F5C"))(length(bk)/2)),
breaks=bk,
cellwidth = 25, # 可以改
cellheight = 15, # 可以改
gaps_col = 13, # 可以改
gaps_row = 16) # 可以改
看图↓
ESTIMATE
ESTIMATE感觉没啥好说的,就直接跑
用来做基质打分、免疫打分和肿瘤纯度
借鉴了生信技能树的代码
library(estimate)
library(tibble)
library(ggplot2) # 这个用ggplot2画图
library(ggpubr)
estimate <- function(dat,pro){
input.f=paste0(pro,'_estimate_input.txt')
output.f=paste0(pro,'_estimate_gene.gct')
output.ds=paste0(pro,'_estimate_score.gct')
write.table(dat,file = input.f,sep = '\t',quote = F)
filterCommonGenes(input.f=input.f,
output.f=output.f ,
id="GeneSymbol")
estimateScore(input.ds = output.f,
output.ds=output.ds,
platform="affymetrix") ## 注意platform,如果输入dat是count,则为illumina
scores=read.table(output.ds,skip = 2,header = T)
rownames(scores)=scores[,1]
scores=t(scores[,3:ncol(scores)])
return(scores)
}
pro= gse_number # 这里可以改成癌症或者其他工程名字
dat = exp # exp为log2后的
scores=estimate(dat,pro)
scores = rownames_to_column(as.data.frame(scores),"id")
scores$id = str_replace_all(scores$id,"\\.","-")
scores$TumorPurity <- cos(0.6049872018+0.0001467884*scores[,4])
group = rep(c('cyst','control'), c(13,8))
group = factor(group, levels = unique(group))
scores$group = group
作图
ggplot(scores, aes(group, ImmuneScore))+
geom_boxplot(aes(fill= group), show.legend = F) +
scale_fill_manual(values = c("#E76F51", "#118AB2"))+
geom_jitter(size = 1, alpha = 0.5)+
theme_classic()+
xlab("")+
ylab("Immune Score")+
labs(title="Title")+
theme(plot.title = element_text(hjust = 0.5))+
stat_compare_means(aes(label = ..p.signif..),
comparisons = list(c('cyst','control')),
na.rm = FALSE)+
theme(panel.background = element_rect(colour = "black",
size = 1))
CIBERSORT
我个人是真得非常不推荐用这个R包做,因为很麻烦!!!第四种方法IOBR做出的结果和这个是一样的,还很简单
用来做22种免疫细胞的比例图
需要提前准备好三个文件,CIBERSORT.R,LM22.txt,GSE7869_exp.txt(代码里有注释)
library(e1071)
library(parallel)
library(preprocessCore)
library(ggplot2)
source('import/CIBERSORT.R') # cibersort 代码
sig_matrix = 'LM22.txt' # 22种免疫细胞
mixture_file = "GSE7869_exp.txt" # 把表达矩阵提前保存为txt格式,csv我会报错
res_cibersort = CIBERSORT(sig_matrix, mixture_file, perm = 100, QN = TRUE) # 这步有点慢
res_cibersort = res_cibersort[,1:22] # 23:25列不是免疫细胞
ciber.res = res_cibersort[,colSums(res_cibersort)>0] # 去掉丰度全为0的细胞
作图
mycol = alpha(rainbow(ncol(ciber.res)),0.7) # 创建彩虹色版(0.7透明度)
par(bty = 'o', mgp = c(2.5,0.3,0), mar = c(2.1,5.1,2.1,20.1),
tcl = -.25, las = 1, xpd = 1)
barplot(as.matrix(t(ciber.res)),
border = NA, # 柱子无边框线
names.arg = rep("",nrow(ciber.res)), # 无横坐标样本名
yaxt = 'n', # 先不绘制y轴
ylab = 'Relatvie percentage',
col = mycol)
axis(side = 2, at = c(0, 0.2, 0.4, 0.6, 0.8, 1), # 补齐y轴添加百分号
labels = c('0%','20%','40%','60%','80%','100%'))
legend(par('usr')[2], # 可以在后面+-调整位置
par('usr')[4],
legend = colnames(ciber.res),
xpd = T,
fill = mycol,
cex = 1,
border = NA,
y.intersp = 1,
x.intersp = 1,
bty = 'n')
dev.off()
看图 (还是很漂亮的)
IOBR
和CIBERSORT一样,用来做免疫细胞比例的,推荐用这个,很方便
library(IOBR)
library(tidyverse)
library(ggplot2)
library(stringr)
cibersort2<-deconvo_tme(eset = exp, # log2之后的
method = "cibersort",
arrays = TRUE, # 高通量为FALSE
perm = 200 )
cibersort2 = cibersort2[,1:23]
cibersort2 = column_to_rownames(cibersort2, 'ID')
ciber.res2 = cibersort2[,colSums(cibersort2)>0] # 去掉丰度全为0的细胞
colnames(ciber.res2) = str_split(colnames(ciber.res2), '_CIBERSORT',simplify = T)[,1]
colnames(ciber.res2) = gsub('_'," ",colnames(ciber.res2))
作图
mycol = alpha(rainbow(ncol(ciber.res2)),0.7) # 创建彩虹色版(0.7透明度)
par(bty = 'o', mgp = c(2.5,0.3,0), mar = c(2.1,5.1,2.1,20.1),
tcl = -.25, las = 1, xpd = 1)
barplot(as.matrix(t(ciber.res2)),
border = NA, # 柱子无边框线
names.arg = rep("",nrow(ciber.res2)), # 无横坐标样本名
yaxt = 'n', # 先不绘制y轴
ylab = 'Relatvie percentage',
col = mycol)
axis(side = 2, at = c(0, 0.2, 0.4, 0.6, 0.8, 1), # 补齐y轴添加百分号
labels = c('0%','20%','40%','60%','80%','100%'))
legend(par('usr')[2], # 可以在后面+-调整位置
par('usr')[4],
legend = colnames(ciber.res2),
xpd = T,
fill = mycol,
cex = 1,
border = NA,
y.intersp = 1,
x.intersp = 1,
bty = 'n')
dev.off()
图和上面那个一模一样,还不用准备那几个文件,完美~
此外,IOBR还可以做其他方法的免疫分析
IOBR集成了8种已发表的对肿瘤微环境 (TME) contexture进行decoding的方法:CIBERSORT、TIMER、xCell、MCPcounter、ESITMATE、EPIC、IPS、quantTIseq。此外,IOBR收集了255个已发表的特征基因集,涉及肿瘤微环境、肿瘤代谢、m6A、外泌体、微卫星不稳定性和三级淋巴结构。IOBR还采用了多种方法进行变量转换、可视化、批量生存分析、特征选择和统计分析,并且支持相应结果的批量分析和可视化。