rm(list = ls())load(file = "step2output.Rdata")
1. 需要表达矩阵和Group,不需要改
library(limma)design=model.matrix(~Group)fit=lmFit(exp, design)fit=eBayes(fit)deg=topTable(fit, coef=2, number = Inf)
2. 为deg数据框添加几列
2.1 加probe_id列,把行名变成一列
library(dplyr)deg <- mutate(deg, probe_id=rownames(deg))head(deg)
2.2 加上探针注释
ids = ids[!duplicated(ids$symbol),]
其他去重方式在zz.去重.R
deg <- inner_join(deg,ids,by="probe_id")head(deg)nrow(deg)
2.3 加change列,标记上下调基因
logFC_t=1P.Value_t = 0.05k1 = (deg$P.Value < P.Value_t)&(deg$logFC < -logFC_t)k2 = (deg$P.Value < P.Value_t)&(deg$logFC > logFC_t)deg <- mutate(deg, change = ifelse(k1, "down", ifelse(k2, "up", "stable")))table(deg$change)
2.4 加ENTREZID列,用于富集分析(symbol转entrezid,然后inner_join)
查询bioconductor,其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
library(clusterProfiler)library(org.Hs.eg.db)s2e <- bitr(deg$symbol,fromType = "SYMBOL",toType = "ENTREZID",OrgDb = org.Hs.eg.db)#人类
2.5 合并保存数据
dim(deg)deg <- inner_join(deg, s2e, by=c("symbol"="SYMBOL"))dim(deg)length(unique(deg$symbol))save(Group,deg,logFC_t,P.Value_t,gse_number,file = "step4output.Rdata")
