Usage

  1. Usage:
  2. enricher(
  3. gene,
  4. pvalueCutoff = 0.05,
  5. pAdjustMethod = "BH",
  6. universe,
  7. minGSSize = 10,
  8. maxGSSize = 500,
  9. qvalueCutoff = 0.2,
  10. TERM2GENE,
  11. TERM2NAME = NA
  12. )
  13. Arguments:
  14. gene: a vector of gene id
  15. pvalueCutoff: pvalue cutoff on enrichment tests to report
  16. pAdjustMethod: one of "holm", "hochberg", "hommel", "bonferroni", "BH",
  17. "BY", "fdr", "none"
  18. universe: background genes. If missing, the all genes listed in the
  19. database (eg TERM2GENE table) will be used as background.
  20. minGSSize: minimal size of genes annotated for testing
  21. maxGSSize: maximal size of genes annotated for testing
  22. qvalueCutoff: qvalue cutoff on enrichment tests to report as
  23. significant. Tests must pass i) pvalueCutoff on unadjusted
  24. pvalues, ii) pvalueCutoff on adjusted pvalues and iii)
  25. qvalueCutoff on qvalues to be reported.
  26. TERM2GENE: user input annotation of TERM TO GENE mapping, a data.frame
  27. of 2 column with term and gene
  28. TERM2NAME: user input of TERM TO NAME mapping, a data.frame of 2 column
  29. with term and name

Example

  1. #!/usr/bin/env Rscript
  2. library(ggplot2)
  3. library(optparse)
  4. suppressMessages(library(showtext))
  5. suppressMessages(library(clusterProfiler))
  6. description <- "
  7. \033[1;32mDifferential Enrichment Analysis and Plot for KEGG/GO\033[0m
  8. "
  9. # =========================================== 参数解析 =========================================================
  10. option_list = list(
  11. make_option(c('-b', '--background'), help = 'The background file contains geneid and function information'),
  12. make_option(c('-g', '--diff_genes'), help = 'The different genes'),
  13. make_option(c('-t', '--type'), help = 'The type of your function, choices: kegg, go'),
  14. make_option(c('-x', '--ontology'), help = 'The ontology for GO enrichment, choices: BP, CC, MF'),
  15. make_option(c('-o', '--outdir'), help = 'The output directory [default: %default]', default = '.'),
  16. make_option(c('-n', '--number'), help = 'The max number used to plot [default: %default]', default = 10, type='integer'),
  17. make_option(c('-p', '--pcutoff'), help = 'The pvalue(p.adjust) cutoff [default: %default]', default = 0.05, type='double'),
  18. make_option(c('-m', '--method'), help = 'The adjustment method [default: %default]', default = 'BH'),
  19. make_option(c('-T', '--title'), help = 'The title of the picture'),
  20. make_option(c('-X', '--xlab'), help = 'The xlab of the picture'),
  21. make_option(c('-Y', '--ylab'), help = 'The ylab of the picture'),
  22. make_option(c('-W', '--width'), type = 'integer', help = 'The width of the picture'),
  23. make_option(c('-H', '--height'), type = 'integer', help = 'The height of the picture')
  24. )
  25. parser <- OptionParser(description = description,
  26. option_list = option_list,
  27. epilogue = 'contact: suqingdong@novogene.com')
  28. args <- parse_args(parser)
  29. if (is.null(args$background) || is.null(args$diff_genes)) {
  30. print_help(parser)
  31. quit('no')
  32. }
  33. # ============================================================================================================
  34. # # just for testing
  35. # args = data.frame(diff_genes = 'diff_gene.list',
  36. # background = 'geneid_kegg.tsv',
  37. # pcutoff = 0.05,
  38. # method = 'BH')
  39. # print(args)
  40. type = args$type
  41. ontology = args$ontology
  42. pcutoff = args$pcutoff
  43. method = args$method
  44. number = args$number
  45. # showtext
  46. # showtext_auto()
  47. # font_add('Times New Roman', './times.ttf')
  48. # 差异基因:每行一个基因ID
  49. diff_genes <- read.table(args$diff_genes)[, 1]
  50. # 背景基因:geneID <TAB> Term
  51. bg_data <- read.table(args$background, sep = '\t')
  52. colnames(bg_data) = c('geneid', 'term')
  53. # term去重,避免重复获取
  54. term_list <- unique(bg_data$term)
  55. # type未指定是自动判断
  56. if (is.null(args$type)) {
  57. if (startsWith(term_list[1], 'GO:')) {
  58. type = 'go'
  59. } else {
  60. type = 'kegg'
  61. }
  62. }
  63. # 添加注释
  64. if (type == 'kegg') {
  65. ko_names <- ko2name(term_list)
  66. merged_data <- merge(bg_data, ko_names, by.x = 'term', by.y = 'ko')
  67. term2gene <- data.frame(merged_data$term, merged_data$geneid)
  68. term2name <- data.frame(merged_data$term, merged_data$name)
  69. } else {
  70. go_terms <- go2term(term_list)
  71. go_onts <- go2ont(term_list)
  72. md <- merge(bg_data, go_terms, by.x = 'term', by.y = 'go_id')
  73. merged_data <- merge(md, go_onts, by.x = 'term', by.y = 'go_id')
  74. if( ! is.null(ontology) ) {
  75. merged_data <- merged_data[merged_data$Ontology==ontology, ]
  76. type <- paste(type, ontology, sep='.')
  77. }
  78. term2gene <- data.frame(merged_data$term, merged_data$geneid)
  79. term2name <- data.frame(merged_data$term, merged_data$Term)
  80. }
  81. # 富集分析
  82. result = enricher(
  83. diff_genes,
  84. TERM2GENE = term2gene,
  85. TERM2NAME = term2name,
  86. pvalueCutoff = pcutoff,
  87. qvalueCutoff = 1,
  88. pAdjustMethod = method # BH == FDR
  89. )
  90. # 结果转数据框
  91. result_df = data.frame(result)
  92. # 按p.adjust正序排序, 分值小的在上
  93. result_df = result_df[order(result_df$p.adjust, decreasing = F), ]
  94. # 保存文件
  95. outfile = sprintf('%s.result.xls', type)
  96. write.table(result_df, outfile, sep='\t', quote = F, row.names = F)
  97. # 截取前N行
  98. result_df = head(result_df, number)
  99. # Ratio列转换成数值
  100. calc_ratio = function(x) {
  101. res = as.numeric(unlist(strsplit(x, "/")))
  102. res[1] / res[2]
  103. }
  104. result_df$GeneRatio = unlist(lapply(result_df$GeneRatio, calc_ratio))
  105. # Description列设置因子,用于控制显示顺序
  106. # 按p.adjust逆序排序, 分值小的图片中在上
  107. result_df = result_df[order(result_df$p.adjust, decreasing = T), ]
  108. result_df$Description = factor(result_df$Description, levels=result_df$Description)
  109. # 样式
  110. family = 'Times New Roman'
  111. size = 28
  112. angle = 0
  113. theme.style <- theme(
  114. text = element_text(family = family),
  115. axis.text = element_text(size = size),
  116. axis.text.y = element_text(angle = angle),
  117. )
  118. # 绘图
  119. # barplot
  120. outbar = sprintf('%s.barplot.png', type)
  121. barplot <-
  122. ggplot(result_df, aes(x = Count, y = Description, fill = p.adjust)) +
  123. geom_bar(stat = 'identity') +
  124. theme_bw() +
  125. theme.style +
  126. scale_fill_gradient(low = "red", high = "blue")
  127. ggsave(outbar, barplot, type = 'cairo')
  128. sprintf('save file: %s', outbar)
  129. # dotplot
  130. outdot = sprintf('%s.dotplot.png', type)
  131. dotplot <-
  132. ggplot(result_df, aes(x=GeneRatio, y=Description, color=p.adjust, size=Count)) +
  133. geom_point() +
  134. scale_color_gradient(low="red", high="blue") +
  135. theme_bw() +
  136. theme.style
  137. ggsave(outdot, dotplot, type='cairo')
  138. sprintf('save file: %s', outdot)