Usage
Usage: enricher( gene, pvalueCutoff = 0.05, pAdjustMethod = "BH", universe, minGSSize = 10, maxGSSize = 500, qvalueCutoff = 0.2, TERM2GENE, TERM2NAME = NA )Arguments: gene: a vector of gene idpvalueCutoff: pvalue cutoff on enrichment tests to reportpAdjustMethod: one of "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"universe: background genes. If missing, the all genes listed in the database (eg TERM2GENE table) will be used as background.minGSSize: minimal size of genes annotated for testingmaxGSSize: maximal size of genes annotated for testingqvalueCutoff: qvalue cutoff on enrichment tests to report as significant. Tests must pass i) ‘pvalueCutoff’ on unadjusted pvalues, ii) ‘pvalueCutoff’ on adjusted pvalues and iii) ‘qvalueCutoff’ on qvalues to be reported.TERM2GENE: user input annotation of TERM TO GENE mapping, a data.frame of 2 column with term and geneTERM2NAME: user input of TERM TO NAME mapping, a data.frame of 2 column with term and name
Example
#!/usr/bin/env Rscriptlibrary(ggplot2)library(optparse)suppressMessages(library(showtext))suppressMessages(library(clusterProfiler))description <- " \033[1;32mDifferential Enrichment Analysis and Plot for KEGG/GO\033[0m"# =========================================== 参数解析 =========================================================option_list = list( make_option(c('-b', '--background'), help = 'The background file contains geneid and function information'), make_option(c('-g', '--diff_genes'), help = 'The different genes'), make_option(c('-t', '--type'), help = 'The type of your function, choices: kegg, go'), make_option(c('-x', '--ontology'), help = 'The ontology for GO enrichment, choices: BP, CC, MF'), make_option(c('-o', '--outdir'), help = 'The output directory [default: %default]', default = '.'), make_option(c('-n', '--number'), help = 'The max number used to plot [default: %default]', default = 10, type='integer'), make_option(c('-p', '--pcutoff'), help = 'The pvalue(p.adjust) cutoff [default: %default]', default = 0.05, type='double'), make_option(c('-m', '--method'), help = 'The adjustment method [default: %default]', default = 'BH'), make_option(c('-T', '--title'), help = 'The title of the picture'), make_option(c('-X', '--xlab'), help = 'The xlab of the picture'), make_option(c('-Y', '--ylab'), help = 'The ylab of the picture'), make_option(c('-W', '--width'), type = 'integer', help = 'The width of the picture'), make_option(c('-H', '--height'), type = 'integer', help = 'The height of the picture'))parser <- OptionParser(description = description, option_list = option_list, epilogue = 'contact: suqingdong@novogene.com')args <- parse_args(parser)if (is.null(args$background) || is.null(args$diff_genes)) { print_help(parser) quit('no')}# ============================================================================================================# # just for testing# args = data.frame(diff_genes = 'diff_gene.list',# background = 'geneid_kegg.tsv',# pcutoff = 0.05,# method = 'BH')# print(args)type = args$typeontology = args$ontologypcutoff = args$pcutoffmethod = args$methodnumber = args$number# showtext# showtext_auto()# font_add('Times New Roman', './times.ttf')# 差异基因:每行一个基因IDdiff_genes <- read.table(args$diff_genes)[, 1]# 背景基因:geneID <TAB> Termbg_data <- read.table(args$background, sep = '\t')colnames(bg_data) = c('geneid', 'term')# term去重,避免重复获取term_list <- unique(bg_data$term)# type未指定是自动判断if (is.null(args$type)) { if (startsWith(term_list[1], 'GO:')) { type = 'go' } else { type = 'kegg' }}# 添加注释if (type == 'kegg') { ko_names <- ko2name(term_list) merged_data <- merge(bg_data, ko_names, by.x = 'term', by.y = 'ko') term2gene <- data.frame(merged_data$term, merged_data$geneid) term2name <- data.frame(merged_data$term, merged_data$name)} else { go_terms <- go2term(term_list) go_onts <- go2ont(term_list) md <- merge(bg_data, go_terms, by.x = 'term', by.y = 'go_id') merged_data <- merge(md, go_onts, by.x = 'term', by.y = 'go_id') if( ! is.null(ontology) ) { merged_data <- merged_data[merged_data$Ontology==ontology, ] type <- paste(type, ontology, sep='.') } term2gene <- data.frame(merged_data$term, merged_data$geneid) term2name <- data.frame(merged_data$term, merged_data$Term)}# 富集分析result = enricher( diff_genes, TERM2GENE = term2gene, TERM2NAME = term2name, pvalueCutoff = pcutoff, qvalueCutoff = 1, pAdjustMethod = method # BH == FDR)# 结果转数据框result_df = data.frame(result)# 按p.adjust正序排序, 分值小的在上result_df = result_df[order(result_df$p.adjust, decreasing = F), ]# 保存文件outfile = sprintf('%s.result.xls', type)write.table(result_df, outfile, sep='\t', quote = F, row.names = F)# 截取前N行result_df = head(result_df, number)# Ratio列转换成数值calc_ratio = function(x) { res = as.numeric(unlist(strsplit(x, "/"))) res[1] / res[2]}result_df$GeneRatio = unlist(lapply(result_df$GeneRatio, calc_ratio))# Description列设置因子,用于控制显示顺序# 按p.adjust逆序排序, 分值小的图片中在上result_df = result_df[order(result_df$p.adjust, decreasing = T), ]result_df$Description = factor(result_df$Description, levels=result_df$Description)# 样式family = 'Times New Roman'size = 28angle = 0theme.style <- theme( text = element_text(family = family), axis.text = element_text(size = size), axis.text.y = element_text(angle = angle),)# 绘图# barplotoutbar = sprintf('%s.barplot.png', type)barplot <- ggplot(result_df, aes(x = Count, y = Description, fill = p.adjust)) + geom_bar(stat = 'identity') + theme_bw() + theme.style + scale_fill_gradient(low = "red", high = "blue")ggsave(outbar, barplot, type = 'cairo')sprintf('save file: %s', outbar)# dotplotoutdot = sprintf('%s.dotplot.png', type)dotplot <- ggplot(result_df, aes(x=GeneRatio, y=Description, color=p.adjust, size=Count)) + geom_point() + scale_color_gradient(low="red", high="blue") + theme_bw() + theme.styleggsave(outdot, dotplot, type='cairo')sprintf('save file: %s', outdot)