image.png

  1. load("TCGA-CHOL_DEG.Rdata")
  2. load("TCGA-CHOL.Rdata")
  3. cg2 = rownames(DEG2)[DEG2$change !="NOT"]
  4. library(tinyarray)
  5. library(edgeR)
  6. dat = log2(cpm(exp)+1)
  7. draw_heatmap(dat[cg2,],Group,cluster_cols = F)
  8. dat2
  9. Group2 = sort(Group)
  10. dat2 = dat[,order(Group)]
  11. draw_heatmap(dat2[cg2,],Group2,cluster_cols = F)

三大R包差异分析

  1. rm(list = ls())
  2. load("DHA.Rdata")
  3. table(Group)
  4. #> Group
  5. #> DMSO DHA
  6. #> 3 3
  7. #deseq2----
  8. library(DESeq2)
  9. colData <- data.frame(row.names =colnames(exp),
  10. condition=Group)
  11. if(!file.exists(paste0(proj,"_dd.Rdata"))){
  12. dds <- DESeqDataSetFromMatrix(
  13. countData = exp,
  14. colData = colData,
  15. design = ~ condition)
  16. dds <- DESeq(dds)
  17. save(dds,file = paste0(proj,"_dd.Rdata"))
  18. }
  19. load(file = paste0(proj,"_dd.Rdata"))
  20. class(dds)
  21. #> [1] "DESeqDataSet"
  22. #> attr(,"package")
  23. #> [1] "DESeq2"
  24. res <- results(dds, contrast = c("condition",rev(levels(Group))))
  25. #constrast
  26. c("condition",rev(levels(Group)))
  27. #> [1] "condition" "DHA" "DMSO"
  28. class(res)
  29. #> [1] "DESeqResults"
  30. #> attr(,"package")
  31. #> [1] "DESeq2"
  32. DEG1 <- as.data.frame(res)
  33. DEG1 <- DEG1[order(DEG1$pvalue),]
  34. DEG1 = na.omit(DEG1)
  35. head(DEG1)
  36. #> baseMean log2FoldChange lfcSE stat pvalue padj
  37. #> CDH6 5006.549 -4.030551 0.10671818 -37.76818 0.000000e+00 0.000000e+00
  38. #> CXCL8 22087.170 4.958524 0.12501648 39.66296 0.000000e+00 0.000000e+00
  39. #> HMOX1 46652.918 2.995161 0.08194416 36.55124 1.704000e-292 1.163094e-288
  40. #> THBD 2832.828 5.025714 0.13858308 36.26499 5.768388e-288 2.952982e-284
  41. #> LIF 4105.206 4.812167 0.14857784 32.38819 4.025761e-230 1.648710e-226
  42. #> DUSP5 2620.024 4.334570 0.13469580 32.18044 3.314657e-227 1.131237e-223
  43. #添加change列标记基因上调下调
  44. logFC_t = 2
  45. pvalue_t = 0.05
  46. k1 = (DEG1$pvalue < pvalue_t)&(DEG1$log2FoldChange < -logFC_t);table(k1)
  47. #> k1
  48. #> FALSE TRUE
  49. #> 19726 751
  50. k2 = (DEG1$pvalue < pvalue_t)&(DEG1$log2FoldChange > logFC_t);table(k2)
  51. #> k2
  52. #> FALSE TRUE
  53. #> 19858 619
  54. DEG1$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
  55. table(DEG1$change)
  56. #>
  57. #> DOWN NOT UP
  58. #> 751 19107 619
  59. head(DEG1)
  60. #> baseMean log2FoldChange lfcSE stat pvalue padj
  61. #> CDH6 5006.549 -4.030551 0.10671818 -37.76818 0.000000e+00 0.000000e+00
  62. #> CXCL8 22087.170 4.958524 0.12501648 39.66296 0.000000e+00 0.000000e+00
  63. #> HMOX1 46652.918 2.995161 0.08194416 36.55124 1.704000e-292 1.163094e-288
  64. #> THBD 2832.828 5.025714 0.13858308 36.26499 5.768388e-288 2.952982e-284
  65. #> LIF 4105.206 4.812167 0.14857784 32.38819 4.025761e-230 1.648710e-226
  66. #> DUSP5 2620.024 4.334570 0.13469580 32.18044 3.314657e-227 1.131237e-223
  67. #> change
  68. #> CDH6 DOWN
  69. #> CXCL8 UP
  70. #> HMOX1 UP
  71. #> THBD UP
  72. #> LIF UP
  73. #> DUSP5 UP
  74. #edgeR----
  75. library(edgeR)
  76. dge <- DGEList(counts=exp,group=Group)
  77. dge$samples$lib.size <- colSums(dge$counts)
  78. dge <- calcNormFactors(dge)
  79. design <- model.matrix(~Group)
  80. dge <- estimateGLMCommonDisp(dge, design)
  81. dge <- estimateGLMTrendedDisp(dge, design)
  82. dge <- estimateGLMTagwiseDisp(dge, design)
  83. fit <- glmFit(dge, design)
  84. fit <- glmLRT(fit)
  85. DEG2=topTags(fit, n=Inf)
  86. class(DEG2)
  87. #> [1] "TopTags"
  88. #> attr(,"package")
  89. #> [1] "edgeR"
  90. DEG2=as.data.frame(DEG2)
  91. head(DEG2)
  92. #> logFC logCPM LR PValue FDR
  93. #> LIF 4.780301 7.069581 571.9517 2.112394e-126 4.695851e-122
  94. #> PTGS2 5.970830 7.152826 565.5336 5.258853e-125 5.845215e-121
  95. #> THBD 4.992993 6.534219 564.2212 1.014763e-124 7.519395e-121
  96. #> CDH6 -4.062412 7.384277 532.6938 7.326559e-118 4.071735e-114
  97. #> CXCL8 4.926708 9.496259 470.0676 3.097901e-104 1.377327e-100
  98. #> TMEM158 5.147555 6.279693 437.4408 3.903283e-97 1.446166e-93
  99. k1 = (DEG2$PValue < pvalue_t)&(DEG2$logFC < -logFC_t)
  100. k2 = (DEG2$PValue < pvalue_t)&(DEG2$logFC > logFC_t)
  101. DEG2$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
  102. head(DEG2)
  103. #> logFC logCPM LR PValue FDR change
  104. #> LIF 4.780301 7.069581 571.9517 2.112394e-126 4.695851e-122 UP
  105. #> PTGS2 5.970830 7.152826 565.5336 5.258853e-125 5.845215e-121 UP
  106. #> THBD 4.992993 6.534219 564.2212 1.014763e-124 7.519395e-121 UP
  107. #> CDH6 -4.062412 7.384277 532.6938 7.326559e-118 4.071735e-114 DOWN
  108. #> CXCL8 4.926708 9.496259 470.0676 3.097901e-104 1.377327e-100 UP
  109. #> TMEM158 5.147555 6.279693 437.4408 3.903283e-97 1.446166e-93 UP
  110. table(DEG2$change)
  111. #>
  112. #> DOWN NOT UP
  113. #> 936 20586 708
  114. ###limma----
  115. library(limma)
  116. dge <- edgeR::DGEList(counts=exp)
  117. dge <- edgeR::calcNormFactors(dge)
  118. design <- model.matrix(~Group)
  119. v <- voom(dge,design, normalize="quantile")
  120. fit <- lmFit(v, design)
  121. fit= eBayes(fit)
  122. DEG3 = topTable(fit, coef=2, n=Inf)
  123. DEG3 = na.omit(DEG3)
  124. k1 = (DEG3$P.Value < pvalue_t)&(DEG3$logFC < -logFC_t)
  125. k2 = (DEG3$P.Value < pvalue_t)&(DEG3$logFC > logFC_t)
  126. DEG3$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
  127. table(DEG3$change)
  128. #>
  129. #> DOWN NOT UP
  130. #> 846 20611 773
  131. head(DEG3)
  132. #> logFC AveExpr t P.Value adj.P.Val B change
  133. #> HMOX1 3.023387 9.933877 59.51263 1.613427e-10 1.895736e-06 15.06135 UP
  134. #> CXCL8 5.010797 8.042577 58.11745 1.896200e-10 1.895736e-06 14.56756 UP
  135. #> PLIN2 3.029546 9.754344 54.12555 3.077709e-10 1.895736e-06 14.47996 UP
  136. #> SQSTM1 2.528669 10.930792 51.44786 4.346774e-10 1.895736e-06 14.22328 UP
  137. #> AKR1C1 3.492536 8.480250 50.22935 5.116695e-10 1.895736e-06 13.91592 UP
  138. #> CDH6 -4.028740 6.272248 -52.96068 3.568837e-10 1.895736e-06 13.67540 DOWN
  139. tj = data.frame(deseq2 = as.integer(table(DEG1$change)),
  140. edgeR = as.integer(table(DEG2$change)),
  141. limma_voom = as.integer(table(DEG3$change)),
  142. row.names = c("down","not","up")
  143. );tj
  144. #> deseq2 edgeR limma_voom
  145. #> down 751 936 846
  146. #> not 19107 20586 20611
  147. #> up 619 708 773
  148. save(DEG1,DEG2,DEG3,Group,tj,file = paste0(proj,"_DEG.Rdata"))

可视化

  1. library(ggplot2)
  2. library(tinyarray)
  3. exp[1:4,1:4]
  4. #> DMSO1 DMSO2 DMSO3 S25uMDNHA1
  5. #> MADD 1097 1586 1055 1527
  6. #> MAST2 2765 2951 2670 5872
  7. #> TRBJ2-5 2 1 1 1
  8. #> TRBJ2-4 1 1 1 1
  9. # cpm 去除文库大小的影响
  10. dat = log2(cpm(exp)+1)
  11. pca.plot = draw_pca(dat,Group);pca.plot
  1. save(pca.plot,file = paste0(proj,"_pcaplot.Rdata"))
  2. cg1 = rownames(DEG1)[DEG1$change !="NOT"]
  3. cg2 = rownames(DEG2)[DEG2$change !="NOT"]
  4. cg3 = rownames(DEG3)[DEG3$change !="NOT"]
  5. h1 = draw_heatmap(dat[cg1,],Group)
  6. h2 = draw_heatmap(dat[cg2,],Group)
  7. h3 = draw_heatmap(dat[cg3,],Group)
  8. v1 = draw_volcano(DEG1,pkg = 1,logFC_cutoff = logFC_t)
  9. v2 = draw_volcano(DEG2,pkg = 2,logFC_cutoff = logFC_t)
  10. v3 = draw_volcano(DEG3,pkg = 3,logFC_cutoff = logFC_t)
  11. library(patchwork)
  12. (h1 + h2 + h3) / (v1 + v2 + v3) +plot_layout(guides = 'collect') &theme(legend.position = "none")
  1. ggsave(paste0(proj,"_heat_vo.png"),width = 15,height = 10)

三大R包差异基因对比

  1. UP=function(df){
  2. rownames(df)[df$change=="UP"]
  3. }
  4. DOWN=function(df){
  5. rownames(df)[df$change=="DOWN"]
  6. }
  7. up = intersect(intersect(UP(DEG1),UP(DEG2)),UP(DEG3))
  8. down = intersect(intersect(DOWN(DEG1),DOWN(DEG2)),DOWN(DEG3))
  9. dat = log2(cpm(exp)+1)
  10. hp = draw_heatmap(dat[c(up,down),],Group)
  11. #上调、下调基因分别画维恩图
  12. up_genes = list(Deseq2 = UP(DEG1),
  13. edgeR = UP(DEG2),
  14. limma = UP(DEG3))
  15. down_genes = list(Deseq2 = DOWN(DEG1),
  16. edgeR = DOWN(DEG2),
  17. limma = DOWN(DEG3))
  18. up.plot <- draw_venn(up_genes,"UPgene")
  19. down.plot <- draw_venn(down_genes,"DOWNgene")
  20. #维恩图拼图,终于搞定
  21. library(patchwork)
  22. #up.plot + down.plot
  23. # 拼图
  24. pca.plot + hp+up.plot +down.plot+ plot_layout(guides = "collect")
  25. ggsave(paste0(proj,"_heat_ve_pca.png"),width = 15,height = 10)

分组聚类的热图

  1. library(ComplexHeatmap)
  2. library(circlize)
  3. col_fun = colorRamp2(c(-2, 0, 2), c("#2fa1dd", "white", "#f87669"))
  4. top_annotation = HeatmapAnnotation(
  5. cluster = anno_block(gp = gpar(fill = c("#f87669","#2fa1dd")),
  6. labels = levels(Group),
  7. labels_gp = gpar(col = "white", fontsize = 12)))
  8. m = Heatmap(t(scale(t(exp[c(up,down),]))),name = " ",
  9. col = col_fun,
  10. top_annotation = top_annotation,
  11. column_split = Group,
  12. show_heatmap_legend = T,
  13. border = F,
  14. show_column_names = F,
  15. show_row_names = F,
  16. use_raster = F,
  17. cluster_column_slices = F,
  18. column_title = NULL)
  19. m