take “KIRC” for example
突变数据下载
https://portal.gdc.cancer.gov/repository
勾选:
然后点:
下载之后解压,得到一个tar.gz文件
数据读取
note:我的临床矩阵之前根据某些条件分成了三组,所以下面都是三组一起跑的
library(maftools)
library(dplyr)
library(stringr)
library(RColorBrewer)
library(ggplot2)
library(ggpubr)
library(patchwork)
library(tibble)
load('Rdata/train_exp_cl.Rdata') # 读取表达矩阵和临床信息
laml = read.maf(maf = 'import/TCGA.KIRC.mutect.somatic.maf.gz') #读取突变数据
计算突变负荷
没必要打包 一步一步跑吧
tmb = tmb(maf = laml)
tmb$Tumor_Sample_Barcode <- stringr::str_sub(tmb$Tumor_Sample_Barcode,1,12)
tmb <- as.data.frame(tmb)
tmb <- tmb[tmb$Tumor_Sample_Barcode %in% rownames(cl),]
rownames(tmb) = tmb[,1]
cl = cl[match(rownames(tmb), rownames(cl)),]
table(rownames(cl) == rownames(tmb)) # 注意这里要是TRUE
tmb$group = cl$group
p1 = ggplot(tmb, aes(group, total_perMB_log))+
geom_boxplot(aes(fill= group), show.legend = F) +
scale_fill_manual(values = c('#86d1d4','#98b4dc','#e5c9e0'))+
geom_jitter(size = 1, alpha = 0.5)+
theme_classic()+
xlab("")+
ylab("TMB")+
# labs(title="normal&tumor")+
theme(plot.title = element_text(hjust = 0.5))+
stat_compare_means(aes(label = ..p.signif..),
comparisons = list(c("group1","group2"),
c("group2","group3"),
c("group1","group3")),
na.rm = FALSE)+
theme(panel.background = element_rect(colour = "black",
size = 1))
突变分类
MAF = laml@data
MAF$Tumor_Sample_Barcode <- str_sub(MAF$Tumor_Sample_Barcode,1,12)
vc_cols <- RColorBrewer::brewer.pal(n = 9, name = 'Paired')
names(vc_cols) = names(table(MAF$Variant_Classification))[order(table(MAF$Variant_Classification),decreasing = T)]
# cl的group列是我的三个分组!!!
group1 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group1',]),]$Variant_Classification)
group2 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group2',]),]$Variant_Classification)
group3 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group3',]),]$Variant_Classification)
data = data.frame(group = rep(c('group1','group2','group3'),
c(4337,6266,627)),
mut = c(group1, group2, group3))
level = names(table(MAF$Variant_Classification))[order(table(MAF$Variant_Classification),decreasing = T)]
data$mut = factor(data$mut, levels = rev(level))
p2 = ggplot(data, aes(group, fill = mut))+
geom_bar(position = 'fill')+
scale_fill_manual(values = vc_cols)+
coord_flip()+
theme_classic()+
theme(panel.border = element_rect(fill=NA,
color="black",
size=1,
linetype="solid"))
突变类型
vt_cols <- RColorBrewer::brewer.pal(n = 3, name = 'Set3')
names(vt_cols) <- c("DEL", "INS", "SNP")
group1 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group1',]),]$Variant_Type)
group2 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group2',]),]$Variant_Type)
group3 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group3',]),]$Variant_Type)
data = data.frame(group = rep(c('group1','group2','group3'),
c(length(group1),length(group2),length(group3))),
mut = c(group1, group2, group3))
p3 = ggplot(data, aes(group, fill = mut))+
geom_bar(position = 'fill')+
scale_fill_manual(values = vt_cols)+
coord_flip()+
theme_classic()+
theme(panel.border = element_rect(fill=NA,
color="black",
size=1,
linetype="solid"))
突变top10基因
group = c('group1','group2','group3')
p = list()
for (i in 1:3){
maf <- MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == group[i],]),]
df <- as.data.frame(table(maf$Hugo_Symbol))
df <- df[order(df$Freq, decreasing = T),]
df <- maf[maf$Hugo_Symbol %in% df$Var1[1:10],]
df <- df[,c("Hugo_Symbol", "Variant_Classification")]
df$Hugo_Symbol <- factor(df$Hugo_Symbol,
levels = names(table(df$Hugo_Symbol))[order(table(df$Hugo_Symbol),decreasing = T)]
)
df$Variant_Classification <- factor(df$Variant_Classification,
levels = names(table(df$Variant_Classification))[order(table(df$Variant_Classification),decreasing = T)]
)
p[[i]] <- ggplot(df, aes(Hugo_Symbol, fill = Variant_Classification))+
geom_bar(position = 'stack')+
scale_fill_manual(values=vc_cols)+
theme_classic()+
xlab("")+
labs(title = paste("top10 mutated genes of", group[i], collapse = ' '))+
# theme(legend.position = "")+
scale_y_continuous(expand = c(0,0))+
coord_flip()+
theme(panel.background = element_rect(colour = "black",
size = 1))
}
p[[1]]+p[[2]]+p[[3]]
瀑布图
oncoplot(maf = laml, top = 15, fontSize = 0.7, colors = vc_cols)
看图 ↓
上一篇:Hmisc皮尔逊相关系数
下一篇:桑基图代码整理