take “KIRC” for example

突变数据下载

https://portal.gdc.cancer.gov/repository
勾选:
image.png image.png
然后点:
image.png
image.png
下载之后解压,得到一个tar.gz文件

数据读取

note:我的临床矩阵之前根据某些条件分成了三组,所以下面都是三组一起跑的

  1. library(maftools)
  2. library(dplyr)
  3. library(stringr)
  4. library(RColorBrewer)
  5. library(ggplot2)
  6. library(ggpubr)
  7. library(patchwork)
  8. library(tibble)
  9. load('Rdata/train_exp_cl.Rdata') # 读取表达矩阵和临床信息
  10. laml = read.maf(maf = 'import/TCGA.KIRC.mutect.somatic.maf.gz') #读取突变数据

计算突变负荷

没必要打包 一步一步跑吧

  1. tmb = tmb(maf = laml)
  2. tmb$Tumor_Sample_Barcode <- stringr::str_sub(tmb$Tumor_Sample_Barcode,1,12)
  3. tmb <- as.data.frame(tmb)
  4. tmb <- tmb[tmb$Tumor_Sample_Barcode %in% rownames(cl),]
  5. rownames(tmb) = tmb[,1]
  6. cl = cl[match(rownames(tmb), rownames(cl)),]
  7. table(rownames(cl) == rownames(tmb)) # 注意这里要是TRUE
  8. tmb$group = cl$group
  9. p1 = ggplot(tmb, aes(group, total_perMB_log))+
  10. geom_boxplot(aes(fill= group), show.legend = F) +
  11. scale_fill_manual(values = c('#86d1d4','#98b4dc','#e5c9e0'))+
  12. geom_jitter(size = 1, alpha = 0.5)+
  13. theme_classic()+
  14. xlab("")+
  15. ylab("TMB")+
  16. # labs(title="normal&tumor")+
  17. theme(plot.title = element_text(hjust = 0.5))+
  18. stat_compare_means(aes(label = ..p.signif..),
  19. comparisons = list(c("group1","group2"),
  20. c("group2","group3"),
  21. c("group1","group3")),
  22. na.rm = FALSE)+
  23. theme(panel.background = element_rect(colour = "black",
  24. size = 1))

突变分类

  1. MAF = laml@data
  2. MAF$Tumor_Sample_Barcode <- str_sub(MAF$Tumor_Sample_Barcode,1,12)
  3. vc_cols <- RColorBrewer::brewer.pal(n = 9, name = 'Paired')
  4. names(vc_cols) = names(table(MAF$Variant_Classification))[order(table(MAF$Variant_Classification),decreasing = T)]
  5. # cl的group列是我的三个分组!!!
  6. group1 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group1',]),]$Variant_Classification)
  7. group2 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group2',]),]$Variant_Classification)
  8. group3 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group3',]),]$Variant_Classification)
  9. data = data.frame(group = rep(c('group1','group2','group3'),
  10. c(4337,6266,627)),
  11. mut = c(group1, group2, group3))
  12. level = names(table(MAF$Variant_Classification))[order(table(MAF$Variant_Classification),decreasing = T)]
  13. data$mut = factor(data$mut, levels = rev(level))
  14. p2 = ggplot(data, aes(group, fill = mut))+
  15. geom_bar(position = 'fill')+
  16. scale_fill_manual(values = vc_cols)+
  17. coord_flip()+
  18. theme_classic()+
  19. theme(panel.border = element_rect(fill=NA,
  20. color="black",
  21. size=1,
  22. linetype="solid"))

突变类型

  1. vt_cols <- RColorBrewer::brewer.pal(n = 3, name = 'Set3')
  2. names(vt_cols) <- c("DEL", "INS", "SNP")
  3. group1 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group1',]),]$Variant_Type)
  4. group2 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group2',]),]$Variant_Type)
  5. group3 <- as.character(MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == 'group3',]),]$Variant_Type)
  6. data = data.frame(group = rep(c('group1','group2','group3'),
  7. c(length(group1),length(group2),length(group3))),
  8. mut = c(group1, group2, group3))
  9. p3 = ggplot(data, aes(group, fill = mut))+
  10. geom_bar(position = 'fill')+
  11. scale_fill_manual(values = vt_cols)+
  12. coord_flip()+
  13. theme_classic()+
  14. theme(panel.border = element_rect(fill=NA,
  15. color="black",
  16. size=1,
  17. linetype="solid"))

突变top10基因

  1. group = c('group1','group2','group3')
  2. p = list()
  3. for (i in 1:3){
  4. maf <- MAF[MAF$Tumor_Sample_Barcode %in% rownames(cl[cl$group == group[i],]),]
  5. df <- as.data.frame(table(maf$Hugo_Symbol))
  6. df <- df[order(df$Freq, decreasing = T),]
  7. df <- maf[maf$Hugo_Symbol %in% df$Var1[1:10],]
  8. df <- df[,c("Hugo_Symbol", "Variant_Classification")]
  9. df$Hugo_Symbol <- factor(df$Hugo_Symbol,
  10. levels = names(table(df$Hugo_Symbol))[order(table(df$Hugo_Symbol),decreasing = T)]
  11. )
  12. df$Variant_Classification <- factor(df$Variant_Classification,
  13. levels = names(table(df$Variant_Classification))[order(table(df$Variant_Classification),decreasing = T)]
  14. )
  15. p[[i]] <- ggplot(df, aes(Hugo_Symbol, fill = Variant_Classification))+
  16. geom_bar(position = 'stack')+
  17. scale_fill_manual(values=vc_cols)+
  18. theme_classic()+
  19. xlab("")+
  20. labs(title = paste("top10 mutated genes of", group[i], collapse = ' '))+
  21. # theme(legend.position = "")+
  22. scale_y_continuous(expand = c(0,0))+
  23. coord_flip()+
  24. theme(panel.background = element_rect(colour = "black",
  25. size = 1))
  26. }
  27. p[[1]]+p[[2]]+p[[3]]

瀑布图

  1. oncoplot(maf = laml, top = 15, fontSize = 0.7, colors = vc_cols)

看图 ↓
image.png