1. rm(list = ls())
    2. load(file = "step2output.Rdata")
    3. #差异分析,用limma包来做
    4. #需要表达矩阵和Group,不需要改
    5. library(limma)
    6. design=model.matrix(~Group)
    7. fit=lmFit(exp,design)
    8. fit=eBayes(fit)
    9. deg=topTable(fit,coef=2,number = Inf)
    1. #为deg数据框添加几列
    2. #1.加probe_id列,把行名变成一列
    3. library(dplyr)
    4. deg <- mutate(deg,probe_id=rownames(deg))
    5. head(deg)
    1. #2.加上探针注释
    2. ids = ids[!duplicated(ids$symbol),]

    加上基因名

    1. deg <- inner_join(deg,ids,by="probe_id")
    2. head(deg)
    3. nrow(deg)

    加质控列,设置阈值

    1. #3.加change列,标记上下调基因
    2. logFC_t=1 ##阈值
    3. P.Value_t = 0.05
    4. k1 = (deg$P.Value < P.Value_t)&(deg$logFC < -logFC_t)
    5. k2 = (deg$P.Value < P.Value_t)&(deg$logFC > logFC_t)
    6. deg <- mutate(deg,change = ifelse(k1,"down",
    7. ifelse(k2,"up","stable")))
    8. table(deg$change)

    加ENTREZID列,用于富集分析

    1. #4.加ENTREZID列,用于富集分析(symbol转entrezid,然后inner_join)
    2. library(clusterProfiler)
    3. library(org.Hs.eg.db)
    4. s2e <- bitr(deg$symbol,
    5. fromType = "SYMBOL",
    6. toType = "ENTREZID",
    7. OrgDb = org.Hs.eg.db)#人类,存放人类各种不同基因ID的包
    8. #其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
    9. dim(deg)
    1. deg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL")) ##列名不一样时必须这样写
    2. dim(deg)
    3. length(unique(deg$symbol)) ##查看有无重复
    4. save(Group,deg,logFC_t,P.Value_t,gse_number,file = "step4output.Rdata")