数据下载

UCSC Xena: https://xenabrowser.net/datapages/

cohort: GDC TCGA Head and Neck Cancer (HNSC) TCGA-HNSC Cox survival analysis - 图1

work with R

1. 读入及初步处理

  1. # 临床数据
  2. clin <- read.table('./TCGA-HNSC.GDC_phenotype.tsv',header = T,sep='\t')
  3. summary(clin)
  4. clin[1:4,1:4]
  5. save(clin,file='clinical.Rdata')
  1. # 表达矩阵
  2. suppressPackageStartupMessages(library(org.Hs.eg.db))
  3. suppressPackageStartupMessages(library(AnnotationDbi))
  4. expr <- read.table('./TCGA-HNSC.htseq_counts.tsv',header = T,sep='\t')
  5. dim(expr)
  6. # [1] 60488 547
  7. expr[1:4,1:4]
  8. # Ensembl_ID TCGA.BB.4224.01A TCGA.H7.7774.01A TCGA.CV.6943.01A
  9. # 1 ENSG00000000003.13 11.127994 11.420487 11.39178
  10. # 2 ENSG00000000005.5 1.584963 0.000000 0.00000
  11. # 3 ENSG00000000419.11 10.650154 10.724514 10.68825
  12. # 4 ENSG00000000457.12 10.055282 9.651052 9.84235
  13. enb <- as.character(expr[,1])
  14. enb_new <- c()
  15. for (i in 1:length(enb)) {
  16. enb_new[i] <- strsplit(enb[i],"\\.")[[1]][1]
  17. }
  18. enb_new <- unique(enb_new)
  19. symb <- AnnotationDbi::select(org.Hs.eg.db,enb_new,"SYMBOL","ENSEMBL")
  20. todelet <- match(symb[,1],enb_new )
  21. expr[,1] <- enb_new
  22. expr <- expr[todelet,]
  23. expr[,1] <- symb[,2]
  24. gs <- expr$Hugo_Symbol
  25. expr <- expr[,-1]
  26. rownames(expr) <- gs
  27. expr <- log2(expr+1)
  28. expr <- apply(expr, 2, function(x){
  29. as.numeric(format(x,digits = 2))
  30. })
  31. save(expr,file='expression.Rdata')
  1. clin$submitter_id.samples = gsub('-','.',clin$submitter_id.samples)
  2. phe <- clin[match(clin$submitter_id.samples,colnames(expr)),]
  3. phe <- phe[is.na(phe[,1]) == FALSE,]
  1. # OS
  2. surv <- read.table('./TCGA-HNSC.survival.tsv',header = T,sep='\t')
  3. geneselected <- expr[c("CHAC2","CLEC9A","GNG10","JCHAIN","KLRB1",
  4. "NOG","OLR1","PRELID2","SYT1","VWCE","ZNF443"),]
  5. surv$sample = gsub('-','.',surv$sample)

3. 整合数据

  1. dat <- t(geneselected)
  2. OSinfo <- surv[,c("OS.time","OS")]
  3. rownames(OSinfo) <- surv$sample
  4. dat2 <- dat[row.names(dat) %in% row.names(OSinfo),] %>% na.omit()
  5. OSinfo <- OSinfo[row.names(dat2),]
  6. dat2 <- cbind(dat2,OSinfo)
  7. library(reshape2)
  8. dat3 <- melt(dat2,c("OS.time","OS"))
  9. colnames(dat3)[3] <- "gene"
  10. save(dat3,file = "df_survivalplot.Rdata")

4. cox survival analysis

  1. library(survival)
  2. library(survminer)
  3. dat3$OS.time <- as.numeric(dat3$OS.time)
  4. dat3$group <- ifelse(dat3$value >= 2.8, "High","Low")
  5. mySurv_OS <- Surv(dat3$OS.time,dat3$OS)
  6. coxfit <- coxph(mySurv_OS ~ gene + group,data = dat3)
  7. summary(coxfit)
  1. # Call:
  2. # coxph(formula = mySurv_OS ~ gene + group, data = dat3)
  3. #
  4. # n= 5995, number of events= 2772
  5. #
  6. # coef exp(coef) se(coef) z Pr(>|z|)
  7. # geneCLEC9A -0.0251926 0.9751221 0.0994572 -0.253 0.800
  8. # geneGNG10 0.0008413 1.0008417 0.0890996 0.009 0.992
  9. # geneJCHAIN -0.0006175 0.9993826 0.0890938 -0.007 0.994
  10. # geneKLRB1 -0.0130832 0.9870020 0.0920338 -0.142 0.887
  11. # geneNOG -0.0240868 0.9762010 0.0986196 -0.244 0.807
  12. # geneOLR1 -0.0045179 0.9954923 0.0894466 -0.051 0.960
  13. # genePRELID2 -0.0024808 0.9975223 0.0891958 -0.028 0.978
  14. # geneSYT1 -0.0109217 0.9891377 0.0911551 -0.120 0.905
  15. # geneVWCE -0.0203458 0.9798598 0.0960100 -0.212 0.832
  16. # geneZNF443 -0.0076639 0.9923654 0.0901146 -0.085 0.932
  17. # groupLow 0.0294441 1.0298819 0.0516385 0.570 0.569
  18. #
  19. # exp(coef) exp(-coef) lower .95 upper .95
  20. # geneCLEC9A 0.9751 1.0255 0.8024 1.185
  21. # geneGNG10 1.0008 0.9992 0.8405 1.192
  22. # geneJCHAIN 0.9994 1.0006 0.8393 1.190
  23. # geneKLRB1 0.9870 1.0132 0.8241 1.182
  24. # geneNOG 0.9762 1.0244 0.8046 1.184
  25. # geneOLR1 0.9955 1.0045 0.8354 1.186
  26. # genePRELID2 0.9975 1.0025 0.8375 1.188
  27. # geneSYT1 0.9891 1.0110 0.8273 1.183
  28. # geneVWCE 0.9799 1.0206 0.8118 1.183
  29. # geneZNF443 0.9924 1.0077 0.8317 1.184
  30. # groupLow 1.0299 0.9710 0.9307 1.140
  31. # Concordance= 0.507 (se = 0.006 )
  32. # Likelihood ratio test= 0.32 on 11 df, p=1
  33. # Wald test = 0.33 on 11 df, p=1
  34. # Score (logrank) test = 0.33 on 11 df, p=1

各基因单独cox

  1. genegroup <- split(dat3,dat3$gene)
  2. for (i in 1:length(genegroup)) {
  3. x <- genegroup[[i]]
  4. x$group <- ifelse(x$value >= 2.8, "High","Low")
  5. # table(x$group)
  6. mySurv_OS <- Surv(x$OS.time,x$OS)
  7. coxfit[[i]] <- coxph(mySurv_OS ~ group,data = x)
  8. # coxres <- summary(coxfit)
  9. }

结果汇总:

  1. ## CHAC2
  2. coxfit[[1]]
  3. # Call:
  4. # coxph(formula = mySurv_OS ~ group, data = x)
  5. #
  6. # coef exp(coef) se(coef) z p
  7. # groupLow -0.1756 0.8390 0.3399 -0.517 0.605
  8. #
  9. # Likelihood ratio test=0.28 on 1 df, p=0.5955
  10. # n= 545, number of events= 252
  11. ## CLEC9A
  12. # Call:
  13. # coxph(formula = mySurv_OS ~ group, data = x)
  14. #
  15. # coef exp(coef) se(coef) z p
  16. # groupLow 0.6239 1.8662 0.2755 2.265 0.0235
  17. #
  18. # Likelihood ratio test=6.19 on 1 df, p=0.01285
  19. # n= 545, number of events= 252
  20. ## GNG10
  21. # Call:
  22. # coxph(formula = mySurv_OS ~ group, data = x)
  23. #
  24. # coef exp(coef) se(coef) z p
  25. # groupLow -0.1834 0.8324 0.5919 -0.31 0.757
  26. #
  27. # Likelihood ratio test=0.1 on 1 df, p=0.7499
  28. # n= 545, number of events= 252
  29. ## JCHAIN
  30. # Call:
  31. # coxph(formula = mySurv_OS ~ group, data = x)
  32. #
  33. # coef exp(coef) se(coef) z p
  34. # groupLow 0.2500 1.2840 0.2341 1.068 0.286
  35. #
  36. # Likelihood ratio test=1.06 on 1 df, p=0.3021
  37. # n= 545, number of events= 252
  38. ## KLRB1
  39. # Call:
  40. # coxph(formula = mySurv_OS ~ group, data = x)
  41. #
  42. # coef exp(coef) se(coef) z p
  43. # groupLow 0.3573 1.4294 0.1279 2.793 0.00522
  44. #
  45. # Likelihood ratio test=7.9 on 1 df, p=0.004951
  46. # n= 545, number of events= 252
  47. ## NOG
  48. # Call:
  49. # coxph(formula = mySurv_OS ~ group, data = x)
  50. #
  51. # coef exp(coef) se(coef) z p
  52. # groupLow -0.1164 0.8901 0.1763 -0.66 0.509
  53. #
  54. # Likelihood ratio test=0.42 on 1 df, p=0.5147
  55. # n= 545, number of events= 252
  56. ## OLR1
  57. # Call:
  58. # coxph(formula = mySurv_OS ~ group, data = x)
  59. #
  60. # coef exp(coef) se(coef) z p
  61. # groupLow -0.1024 0.9027 0.1647 -0.622 0.534
  62. #
  63. # Likelihood ratio test=0.39 on 1 df, p=0.5297
  64. # n= 545, number of events= 252
  65. ## PRELID2
  66. # Call:
  67. # coxph(formula = mySurv_OS ~ group, data = x)
  68. #
  69. # coef exp(coef) se(coef) z p
  70. # groupLow -0.4077 0.6652 0.2245 -1.816 0.0693
  71. #
  72. # Likelihood ratio test=3.68 on 1 df, p=0.05491
  73. # n= 545, number of events= 252
  74. ## SYT1
  75. # Call:
  76. # coxph(formula = mySurv_OS ~ group, data = x)
  77. #
  78. # coef exp(coef) se(coef) z p
  79. # groupLow -0.2620 0.7695 0.1324 -1.979 0.0478
  80. #
  81. # Likelihood ratio test=4.01 on 1 df, p=0.04519
  82. # n= 545, number of events= 252
  83. ## VWCE33
  84. # Call:
  85. # coxph(formula = mySurv_OS ~ group, data = x)
  86. #
  87. # coef exp(coef) se(coef) z p
  88. # groupLow 0.2104 1.2341 0.1508 1.395 0.163
  89. #
  90. # Likelihood ratio test=2.02 on 1 df, p=0.1554
  91. # n= 545, number of events= 252
  92. ## ZNF443
  93. # Call:
  94. # coxph(formula = mySurv_OS ~ group, data = x)
  95. #
  96. # coef exp(coef) se(coef) z p
  97. # groupLow -0.02915 0.97127 0.13861 -0.21 0.833
  98. #
  99. # Likelihood ratio test=0.04 on 1 df, p=0.8331
  100. # n= 545, number of events= 252