Liu G, Zeng X, Wu B, et al. RNA-Seq analysis of peripheral blood mononuclear cells reveals unique transcriptional signatures associated with radiotherapy response of nasopharyngeal carcinoma and prognosis of head and neck cancer[J]. Cancer biology & therapy, 2020, 21(2): 139-146.

读入表达矩阵

发现上次的一个错误:

Ensembl ID 转换 symbol 后,表达矩阵没有去重,行名有重复(虽然不知道为什么没有报错….

  1. library(AnnotationDbi)
  2. library(org.Hs.eg.db)
  3. library(survival)
  4. library(survminer)
  1. expr <- read.table('../TCGA-HNSC.htseq_counts.tsv',header = T,sep='\t')
  2. dim(expr)
  3. # [1] 60488 547
  4. tail(expr[,1])
  5. # [1] "ENSGR0000281849.1" "__no_feature" "__ambiguous"
  6. # [4] "__too_low_aQual" "__not_aligned" "__alignment_not_unique"
  7. expr <- expr[1:(nrow(expr)-5),]
  8. class(expr[,1])
  9. # [1] "factor"
  10. expr[,1] <- as.character(expr[,1])
  11. for (i in 1:nrow(expr)) {
  12. expr[i,1] <- strsplit(expr[i,1],"\\.")[[1]][1]
  13. }
  14. symb <- AnnotationDbi::select(org.Hs.eg.db,expr[,1],"SYMBOL","ENSEMBL")
  15. symbtrim <- symb[match(unique(symb$SYMBOL),symb$SYMBOL),] %>% na.omit()
  16. row.names(expr) <- expr[,1]
  17. expr <- expr[,-1]
  18. expr <- expr[symbtrim$ENSEMBL,]
  19. row.names(expr) <- symbtrim$SYMBOL
  1. expr <- log2(edgeR::cpm(expr)+1)
  2. kp <- substring(colnames(expr),14,15)=='01';table(kp)
  3. # kp
  4. # FALSE TRUE
  5. # 46 500
  6. table(substring(colnames(expr),14,15))
  7. #
  8. # 01 06 11
  9. # 500 2 44
  10. expr <- expr[,kp]
  11. dim(expr)
  12. # [1] 25483 500
  13. save(expr, file = "HNSC_expr.Rdata")

文章中的样本量是497,现在取500

The differential abundance of the expression of radiotherapy-associated genes was further tested in a total of 497 HNSC samples downloaded from TCGA.

读入临床信息

这次发现一个问题

TCGA-HNSC Cox & KM plot - 图2

  1. ## 上次并没有读取完整
  2. clin <- read.table('./TCGA-HNSC.GDC_phenotype.tsv/TCGA-HNSC.GDC_phenotype.tsv',header = T,sep='\t')
  3. dim(clin)
  4. # [1] 306 139
  5. clin <- read.table('./TCGA-HNSC.GDC_phenotype.tsv/TCGA-HNSC.GDC_phenotype.tsv',header = T,sep='\t',fill = T,quote = "")
  6. dim(clin)
  7. # [1] 612 139
  1. clin$submitter_id.samples = gsub('-','.',clin$submitter_id.samples)
  2. row.names(clin) <- clin[,1]
  3. phe <- clin[substring(clin$submitter_id.samples,14,15)=='01',]
  4. pid <- intersect(colnames(expr),rownames(phe))
  5. expr <- expr[,pid]
  6. phe <- phe[pid,]
  7. phe <- phe[,c("race.demographic","age_at_initial_pathologic_diagnosis","clinical_stage","radiation_therapy")]
  1. surv <- read.table('../TCGA-HNSC.survival.tsv',header = T,sep='\t')
  2. surv$sample = gsub('-','.',surv$sample)
  3. row.names(surv) <- surv[,1]
  4. surv <- surv[pid,]
  5. phe <- cbind(phe,surv[,c("OS.time","OS")])
  6. colnames(phe) <- c("race","age","stage","radiation","time","event")
  7. save(phe, file = "HNSC_phe.Rdata")

Cox regression

  1. signature <- expr[c("CHAC2","CLEC9A","GNG10","JCHAIN","KLRB1",
  2. "NOG","OLR1","PRELID2","SYT1","VWCE","ZNF443"),]
  3. mySurv=with(phe,Surv(time, event))

univariate cox regression

  1. unicox <-apply(signature, 1 ,function(gene){
  2. group=ifelse(gene>median(gene),'high','low')
  3. survival_dat <- data.frame(group=group,stage=phe$stage,age=phe$age,race=phe$race,
  4. radiation=phe$radiation,stringsAsFactors = F)
  5. m=coxph(mySurv ~group, data = survival_dat)
  6. beta <- coef(m)
  7. se <- sqrt(diag(vcov(m)))
  8. HR <- exp(beta)
  9. HRse <- HR * se
  10. tmp <- round(cbind(coef = beta, se = se, z = beta/se, p = 1 - pchisq((beta/se)^2, 1),
  11. HR = HR, HRse = HRse,
  12. HRz = (HR - 1) / HRse, HRp = 1 - pchisq(((HR - 1)/HRse)^2, 1),
  13. HRCILL = exp(beta - qnorm(.975, 0, 1) * se),
  14. HRCIUL = exp(beta + qnorm(.975, 0, 1) * se)), 3)
  15. return(tmp['grouplow',])
  16. })
  17. unicox=t(unicox)
  18. table(unicox[,4]<0.05)
  19. #
  20. # FALSE TRUE
  21. # 3 8

TCGA-HNSC Cox & KM plot - 图3

multivariate cox regression

  1. multicox <-apply(signature, 1 ,function(gene){
  2. group=ifelse(gene>median(gene),'high','low')
  3. survival_dat <- data.frame(group=group,stage=phe$stage,age=phe$age,race=phe$race,
  4. radiation=phe$radiation,stringsAsFactors = F)
  5. m=coxph(mySurv ~ age + stage+ group+radiation+race, data = survival_dat)
  6. beta <- coef(m)
  7. se <- sqrt(diag(vcov(m)))
  8. HR <- exp(beta)
  9. HRse <- HR * se
  10. tmp <- round(cbind(coef = beta, se = se, z = beta/se, p = 1 - pchisq((beta/se)^2, 1),
  11. HR = HR, HRse = HRse,
  12. HRz = (HR - 1) / HRse, HRp = 1 - pchisq(((HR - 1)/HRse)^2, 1),
  13. HRCILL = exp(beta - qnorm(.975, 0, 1) * se),
  14. HRCIUL = exp(beta + qnorm(.975, 0, 1) * se)), 3)
  15. return(tmp['grouplow',])
  16. })
  17. multicox=t(multicox)
  18. table(multicox[,4]<0.05)
  19. #
  20. # FALSE TRUE
  21. # 2 9

TCGA-HNSC Cox & KM plot - 图4

KM Plot

  1. for (i in 1:nrow(signature)) {
  2. gene=as.numeric(signature[i,])
  3. phe$group=ifelse(gene>median(gene),'high','low')
  4. fit <- survfit(Surv(time, event) ~ group,
  5. data = phe)
  6. survp=ggsurvplot(
  7. fit,
  8. pval = TRUE,
  9. title=row.names(signature)[i]
  10. )
  11. ggsave(filename = paste0('TCGA_gene_',i,'.png'),
  12. print(survp))
  13. }

022103104581_0TCGA_gene_1.png