1. rm(list = ls())
  2. load(file = "step2output.Rdata")

1. 需要表达矩阵和Group,不需要改

  1. library(limma)
  2. design=model.matrix(~Group)
  3. fit=lmFit(exp, design)
  4. fit=eBayes(fit)
  5. deg=topTable(fit, coef=2, number = Inf)

2. 为deg数据框添加几列

2.1 加probe_id列,把行名变成一列

  1. library(dplyr)
  2. deg <- mutate(deg, probe_id=rownames(deg))
  3. head(deg)

2.2 加上探针注释

  1. ids = ids[!duplicated(ids$symbol),]

其他去重方式在zz.去重.R

  1. deg <- inner_join(deg,ids,by="probe_id")
  2. head(deg)
  3. nrow(deg)

2.3 加change列,标记上下调基因

  1. logFC_t=1
  2. P.Value_t = 0.05
  3. k1 = (deg$P.Value < P.Value_t)&(deg$logFC < -logFC_t)
  4. k2 = (deg$P.Value < P.Value_t)&(deg$logFC > logFC_t)
  5. deg <- mutate(deg, change = ifelse(k1, "down", ifelse(k2, "up", "stable")))
  6. table(deg$change)

2.4 加ENTREZID列,用于富集分析(symbol转entrezid,然后inner_join)

查询bioconductor,其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb

  1. library(clusterProfiler)
  2. library(org.Hs.eg.db)
  3. s2e <- bitr(deg$symbol,
  4. fromType = "SYMBOL",
  5. toType = "ENTREZID",
  6. OrgDb = org.Hs.eg.db)#人类

2.5 合并保存数据

  1. dim(deg)
  2. deg <- inner_join(deg, s2e, by=c("symbol"="SYMBOL"))
  3. dim(deg)
  4. length(unique(deg$symbol))
  5. save(Group,deg,logFC_t,P.Value_t,gse_number,file = "step4output.Rdata")