YTHDF1在卵巢癌的预后

1. 数据准备

下载

[cohort: GDC TCGA Ovarian Cancer (OV)]

读入/整合

  1. library(AnnotationDbi)
  2. library(org.Hs.eg.db)
  3. library(survival)
  4. library(survminer)
  • 表达矩阵
  1. expr <- read.table('./TCGA-OV.htseq_counts.tsv/TCGA-OV.htseq_counts.tsv',header = T,sep='\t')
  2. dim(expr)
  3. # [1] 60488 380
  4. expr[1:4,1:4]
  5. # Ensembl_ID TCGA.23.1120.01A TCGA.29.1695.01A TCGA.61.2003.01A
  6. # 1 ENSG00000000003.13 12.308908 12.298063 11.34430
  7. # 2 ENSG00000000005.5 3.000000 3.906891 1.00000
  8. # 3 ENSG00000000419.11 11.587309 11.433585 11.84549
  9. # 4 ENSG00000000457.12 9.147205 10.601771 10.23122
  10. tail(expr[,1])
  11. # [1] "ENSGR0000281849.1" "__no_feature" "__ambiguous"
  12. # [4] "__too_low_aQual" "__not_aligned" "__alignment_not_unique"
  13. expr <- expr[1:(nrow(expr)-5),]
  14. class(expr[,1])
  15. # [1] "factor"
  16. expr[,1] <- as.character(expr[,1])
  17. for (i in 1:nrow(expr)) {
  18. expr[i,1] <- strsplit(expr[i,1],"\\.")[[1]][1]
  19. }
  20. symb <- select(org.Hs.eg.db,expr[,1],"SYMBOL","ENSEMBL")
  21. table(symb[,2]=="YTHDF1")
  22. #
  23. # FALSE TRUE
  24. # 25590 1
  25. which(symb$SYMBOL=="YTHDF1")
  26. # [1] 9300
  27. thegene <- symb[9300,]
  28. thegene <- expr[expr$Ensembl_ID==thegene$ENSEMBL,]
  29. thegene <- thegene[,-1]
  30. row.names(thegene) <- "YTHDF1"
  31. thegene <- round(thegene,digits = 2)
  32. save(thegene,file='YTHDF1expression.Rdata')
  • 临床数据
  1. clin <- read.table('./TCGA-HNSC.GDC_phenotype.tsv',header = T,sep='\t')
  2. summary(clin)
  3. clin[1:4,1:4]
  4. clin$submitter_id.samples = gsub('-','.',clin$submitter_id.samples)
  5. row.names(clin) <- clin[,1]
  6. clin <- clin[c("age_at_initial_pathologic_diagnosis","clinical_stage","person_neoplasm_cancer_status",
  7. "radiation_therapy","race.demographic")]
  8. colnames(clin) <- c("age","stage","status","radiation","race")
  9. clin <- na.omit(clin)
  10. save(clin,file='clinical.Rdata')
  • 生存数据
  1. surv <- read.table('./TCGA-OV.survival.tsv/TCGA-OV.survival.tsv',header = T,sep='\t')
  • 整合数据(挑选病人)
  1. surv$sample <- gsub('-','.',surv$sample)
  2. row.names(surv) <- surv[,1]
  3. surv <- surv[surv$sample %in% row.names(clin),]
  4. clin <- clin[row.names(clin) %in% surv$sample,]
  5. phe <- cbind(clin,surv[,c("OS.time","OS")])
  6. thegene <- thegene[,colnames(thegene) %in% row.names(phe)]
  7. phe <- phe[row.names(phe) %in% colnames(thegene),]
  8. # table(row.names(phe) %in% colnames(thegene))
  9. dim(phe)
  10. # [1] 188 7
  11. save(phe,file='phe_surv.Rdata')

2. cox

  1. mySurv=with(phe,Surv(OS.time, OS))

univariate cox regression

  1. unicox <-apply(thegene, 1 ,function(gene){
  2. group=ifelse(gene>median(gene),'high','low')
  3. survival_dat <- data.frame(group=group,stage=phe$stage,age=phe$age,status=phe$status,
  4. radiation=phe$radiation,race=phe$race,stringsAsFactors = F)
  5. m=coxph(mySurv ~group, data = survival_dat)
  6. beta <- coef(m)
  7. se <- sqrt(diag(vcov(m)))
  8. HR <- exp(beta)
  9. HRse <- HR * se
  10. tmp <- round(cbind(coef = beta, se = se, z = beta/se, p = 1 - pchisq((beta/se)^2, 1),
  11. HR = HR, HRse = HRse,
  12. HRz = (HR - 1) / HRse, HRp = 1 - pchisq(((HR - 1)/HRse)^2, 1),
  13. HRCILL = exp(beta - qnorm(.975, 0, 1) * se),
  14. HRCIUL = exp(beta + qnorm(.975, 0, 1) * se)), 3)
  15. return(tmp['grouplow',])
  16. })
  17. unicox=t(unicox)
  18. table(unicox[,4]<0.05)
  19. #
  20. # FALSE
  21. # 1
  22. unicox[,4]
  23. # [1] 0.933

multivariate cox regression

  1. multicox <-apply(thegene, 1 ,function(gene){
  2. group=ifelse(gene>median(gene),'high','low')
  3. survival_dat <- data.frame(group=group,stage=phe$stage,age=phe$age,status=phe$status,
  4. radiation=phe$radiation,race=phe$race,stringsAsFactors = F)
  5. m=coxph(mySurv ~ age + stage+ group+status+radiation+race, data = survival_dat)
  6. beta <- coef(m)
  7. se <- sqrt(diag(vcov(m)))
  8. HR <- exp(beta)
  9. HRse <- HR * se
  10. tmp <- round(cbind(coef = beta, se = se, z = beta/se, p = 1 - pchisq((beta/se)^2, 1),
  11. HR = HR, HRse = HRse,
  12. HRz = (HR - 1) / HRse, HRp = 1 - pchisq(((HR - 1)/HRse)^2, 1),
  13. HRCILL = exp(beta - qnorm(.975, 0, 1) * se),
  14. HRCIUL = exp(beta + qnorm(.975, 0, 1) * se)), 3)
  15. return(tmp['grouplow',])
  16. })
  17. multicox=t(multicox)
  18. table(multicox[,4]<0.05)
  19. #
  20. # FALSE
  21. # 1
  22. multicox[,4]
  23. # [1] 0.982

作图

  1. group=ifelse(as.numeric(thegene)>median(as.numeric(thegene)),'high','low')
  2. phe$group <- group
  3. ggsurvplot(survfit(Surv(OS.time, OS)~group, data=phe), pval=TRUE)

Prognosis of YTHDF1 in OV cancer - 图1