Liu G, Zeng X, Wu B, et al. RNA-Seq analysis of peripheral blood mononuclear cells reveals unique transcriptional signatures associated with radiotherapy response of nasopharyngeal carcinoma and prognosis of head and neck cancer[J]. Cancer biology & therapy, 2020, 21(2): 139-146.

四种方法p值对比

Cox median km best cut
GNG10 <0.0001 0.00044 0.00055 0.00018
ZNF443 0.152 0.1 0.54 0.02

取上次分析 p 值最小/最大的两个基因:

生存分析不同方法p值对比 - 图1

  1. rm(list = ls())
  2. options(stringsAsFactors = F)
  3. load(file='../HNSC_expr.Rdata')
  4. exprSet=expr
  5. datExpr=log2(edgeR::cpm(exprSet)+1)
  6. load("../HNSC_phe.Rdata")
  7. dat <- phe
  8. dat$GNG10=c(t(datExpr['GNG10',]))
  9. dat$ZNF443=c(t(datExpr['ZNF443',]))
  10. head(dat)
  11. # race age stage radiation time event GNG10
  12. # TCGA.BB.4224.01A white 52 Stage III YES 278 0 5.611493
  13. # TCGA.H7.7774.01A white 75 Stage III YES 407 0 5.823435
  14. # TCGA.CV.6943.01A white 74 Stage III 602 1 5.759393
  15. # TCGA.CN.5374.01A white 56 Stage IVA YES 1732 1 5.685456
  16. # TCGA.CQ.6227.01A white 77 Stage III NO 129 1 5.759871
  17. # TCGA.CV.6959.01A white 48 Stage III NO 256 1 5.767435
  18. # ZNF443
  19. # TCGA.BB.4224.01A 5.611675
  20. # TCGA.H7.7774.01A 5.690211
  21. # TCGA.CV.6943.01A 5.663535
  22. # TCGA.CN.5374.01A 5.660077
  23. # TCGA.CQ.6227.01A 5.530966
  24. # TCGA.CV.6959.01A 5.661470

1 median method

  1. library(survival)
  2. library("survminer")
  3. library(ggplot2)

GNG10

  1. dat$gGNG10=ifelse(dat$GNG10<median(dat$GNG10),'Low GNG10','High GNG10')
  2. sfit_GNG10 = survfit(Surv(time,event) ~ gGNG10, data = dat)
  3. sfit_GNG10
  4. # Call: survfit(formula = Surv(time, event) ~ gGNG10, data = dat)
  5. #
  6. # 1 observation deleted due to missingness
  7. # n events median 0.95LCL 0.95UCL
  8. # gGNG10=High GNG10 250 125 1133 773 1718
  9. # gGNG10=Low GNG10 249 93 2002 1591 3059
  10. ggsurvplot(sfit_GNG10,
  11. pval = T,conf.int = T,
  12. risk.table = T,
  13. surv.median.line = 'hv',
  14. ggtheme = theme_bw(),
  15. palette = c('#E7B800','#2E9FDF'))+
  16. xlab('Months') + ylab('PFS')

生存分析不同方法p值对比 - 图2

ZNF443

  1. dat$gZNF443=ifelse(dat$ZNF443<median(dat$ZNF443),'Low ZNF443','High ZNF443')
  2. sfit_ZNF443 = survfit(Surv(time,event) ~ gZNF443, data = dat)
  3. sfit_ZNF443
  4. # Call: survfit(formula = Surv(time, event) ~ gZNF443, data = dat)
  5. #
  6. # 1 observation deleted due to missingness
  7. # n events median 0.95LCL 0.95UCL
  8. # gZNF443=High ZNF443 250 97 1838 1504 2717
  9. # gZNF443=Low ZNF443 249 121 1133 927 2083
  10. ggsurvplot(sfit_ZNF443,
  11. pval = T,conf.int = T,
  12. risk.table = T,
  13. surv.median.line = 'hv',
  14. ggtheme = theme_bw(),
  15. palette = c('#E7B800','#2E9FDF'))+
  16. xlab('Months') + ylab('PFS')

生存分析不同方法p值对比 - 图3

2 survivalROC package, km method

其实没太搞明白 predict.time 的设置原则

  1. boxplot(dat$time)
  2. library(survivalROC)
  3. cuttime=1000 ## 暂且设为1000

生存分析不同方法p值对比 - 图4

GNG10

  1. sroc=survivalROC(Stime = dat$time,
  2. status = dat$event,
  3. marker = dat$GNG10,
  4. predict.time = cuttime,
  5. method = 'KM')
  6. cut.op=sroc$cut.values[which.max(sroc$TP-sroc$FP)]
  7. cut.op
  8. # [1] 5.762685
  1. plot(sroc$FP,sroc$TP,type = 'l',xlim = c(0,1),ylim = c(0,1),
  2. xlab = paste('FP','\n','AUC=',round(sroc$AUC,3)),
  3. ylab = 'TP',main = 'survival ROC',col='red')
  4. abline(0,1)
  5. legend('bottomright',c('gene expression'),col = 'red',lty = c(1,1))

生存分析不同方法p值对比 - 图5

  1. dat$gGNG10=ifelse(dat$GNG10 < cut.op,'Low GNG10','High GNG10')
  2. sfit = survfit(Surv(time,event) ~ gGNG10, data = dat)
  3. sfit
  4. # Call: survfit(formula = Surv(time, event) ~ gGNG10, data = dat)
  5. #
  6. # 1 observation deleted due to missingness
  7. # n events median 0.95LCL 0.95UCL
  8. # gGNG10=High GNG10 256 128 1133 763 1718
  9. # gGNG10=Low GNG10 243 90 2002 1591 3059
  10. ggsurvplot(sfit,
  11. pval = T,conf.int = T,
  12. risk.table = T,
  13. surv.median.line = 'hv',
  14. ggtheme = theme_bw(),
  15. palette = c('#E7B800','#2E9FDF'))+
  16. xlab('Months') + ylab('PFS')

生存分析不同方法p值对比 - 图6

ZNF443

  1. cuttime=1000
  2. sroc=survivalROC(Stime = dat$time,
  3. status = dat$event,
  4. marker = dat$ZNF443,
  5. predict.time = cuttime,
  6. method = 'KM')
  7. cut.op=sroc$cut.values[which.max(sroc$TP-sroc$FP)]
  8. cut.op
  9. # [1] 5.747971
  1. plot(sroc$FP,sroc$TP,type = 'l',xlim = c(0,1),ylim = c(0,1),
  2. xlab = paste('FP','\n','AUC=',round(sroc$AUC,3)),
  3. ylab = 'TP',main = 'survival ROC',col='red')
  4. abline(0,1)
  5. legend('bottomright',c('gene expression'),col = 'red',lty = c(1,1))

生存分析不同方法p值对比 - 图7

  1. dat$gZNF443=ifelse(dat$ZNF443 < cut.op,'Low ZNF443','High ZNF443')
  2. sfit = survfit(Surv(time,event) ~ gZNF443, data = dat)
  3. sfit
  4. # Call: survfit(formula = Surv(time, event) ~ gZNF443, data = dat)
  5. #
  6. # 1 observation deleted due to missingness
  7. # n events median 0.95LCL 0.95UCL
  8. # gZNF443=High ZNF443 26 13 2570 395 NA
  9. # gZNF443=Low ZNF443 473 205 1641 1289 2002
  10. ggsurvplot(sfit,
  11. pval = T,conf.int = T,
  12. risk.table = T,
  13. surv.median.line = 'hv',
  14. ggtheme = theme_bw(),
  15. palette = c('#E7B800','#2E9FDF'))+
  16. xlab('Months') + ylab('PFS')

生存分析不同方法p值对比 - 图8

3 survminer package, find the best surv_cutpoint

  1. cut_point=surv_cutpoint(dat,time = 'time',event = 'event',
  2. variables = c('GNG10','ZNF443'))
  3. summary(cut_point)
  4. # cutpoint statistic
  5. # GNG10 5.762685 3.735037
  6. # ZNF443 5.702524 2.398513
  7. res_cat=surv_categorize(cut_point)
  8. head(res_cat)
  9. # time event GNG10 ZNF443
  10. # TCGA.BB.4224.01A 278 0 low low
  11. # TCGA.H7.7774.01A 407 0 high low
  12. # TCGA.CV.6943.01A 602 1 low low
  13. # TCGA.CN.5374.01A 1732 1 low low
  14. # TCGA.CQ.6227.01A 129 1 low low
  15. # TCGA.CV.6959.01A 256 1 high low

GNG10

  1. sfit = survfit(Surv(time,event) ~ GNG10, data = res_cat)
  2. sfit
  3. # Call: survfit(formula = Surv(time, event) ~ GNG10, data = res_cat)
  4. #
  5. # 1 observation deleted due to missingness
  6. # n events median 0.95LCL 0.95UCL
  7. # GNG10=high 255 128 1081 763 1671
  8. # GNG10=low 244 90 2002 1732 3314
  9. ggsurvplot(sfit,
  10. pval = T,conf.int = T,
  11. risk.table = T,
  12. surv.median.line = 'hv',
  13. ggtheme = theme_bw(),
  14. palette = c('#E7B800','#2E9FDF'))+
  15. xlab('Months') + ylab('PFS')

生存分析不同方法p值对比 - 图9

ZNF443

  1. sfit = survfit(Surv(time,event) ~ ZNF443, data = res_cat)
  2. sfit
  3. # Call: survfit(formula = Surv(time, event) ~ ZNF443, data = res_cat)
  4. #
  5. # 1 observation deleted due to missingness
  6. # n events median 0.95LCL 0.95UCL
  7. # ZNF443=high 103 34 2570 1641 NA
  8. # ZNF443=low 396 184 1398 998 1838
  9. ggsurvplot(sfit,
  10. pval = T,conf.int = T,
  11. risk.table = T,
  12. surv.median.line = 'hv',
  13. ggtheme = theme_bw(),
  14. palette = c('#E7B800','#2E9FDF'))+
  15. xlab('Months') + ylab('PFS')

生存分析不同方法p值对比 - 图10