signature 生存分析 p<0.05 的癌症:ACC、KIRP、LGG、LUAD、MESO、SARC、THYM

PCA将样本分成四组

  1. sigs <- t(expr[c("CYP4B1","ADH1B","IL33","CD1D","CR1","CABCOCO1",
  2. "TCEAL2","ID1","CDC45","ANLN","CDCA2",
  3. "PRC1","TOP2A"),])

一套流程

  1. rm(list = ls())
  2. ### expr
  3. expr <- read.table('TCGA-DLBC.htseq_counts.tsv/TCGA-DLBC.htseq_counts.tsv',header = T,sep='\t')
  4. tail(expr[,1])
  5. expr <- expr[1:(nrow(expr)-5),]
  6. expr[,1] <- as.character(expr[,1])
  7. for (i in 1:nrow(expr)) {
  8. expr[i,1] <- strsplit(expr[i,1],"\\.")[[1]][1]
  9. }
  10. symb <- AnnotationDbi::select(org.Hs.eg.db,expr[,1],"SYMBOL","ENSEMBL")
  11. symbtrim <- symb[match(unique(symb$SYMBOL),symb$SYMBOL),] %>% na.omit()
  12. row.names(expr) <- expr[,1]
  13. expr <- expr[,-1]
  14. expr <- expr[symbtrim$ENSEMBL,]
  15. row.names(expr) <- symbtrim$SYMBOL
  16. expr <- log2(edgeR::cpm(expr)+1)
  17. # kp <- substring(colnames(expr),14,15)=='01';table(kp)
  18. # expr <- expr[,kp]
  19. dim(expr)
  20. ### phe
  21. phe <- read.table('./TCGA-DLBC.survival.tsv/TCGA-DLBC.survival.tsv',header = T,sep='\t',
  22. fill = T,quote = "")
  23. phe$sample = gsub('-','.',phe$sample)
  24. row.names(phe) <- phe[,1]
  25. # phe <- phe[substring(phe$sample,14,15)=='01',]
  26. id <- intersect(colnames(expr),rownames(phe))
  27. expr <- expr[,id]
  28. phe <- phe[id,]
  29. dim(phe)
  30. dim(expr)
  31. sigs <- t(expr[c("CYP4B1","ADH1B","IL33","CD1D","CR1","CABCOCO1",
  32. "TCEAL2","ID1","CDC45","ANLN","CDCA2",
  33. "PRC1","TOP2A"),])
  34. ## 这里如果不做转置,pca会根据13个基因来分组
  35. library("FactoMineR")
  36. library("factoextra")
  37. dat.pca <- PCA(sigs, graph = FALSE)
  38. fviz_pca_ind(dat.pca,
  39. geom.ind = "point"
  40. )
  41. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  42. phe$pc1=dat.pca$ind$coord[,1]>0
  43. phe$pc2=dat.pca$ind$coord[,2]>0
  44. phe$gene=paste0(phe$pc1,phe$pc2)
  45. library(survival)
  46. library(survminer)
  47. fit <- survfit(Surv(OS.time, OS) ~ gene,
  48. data = phe)
  49. ggsurvplot(
  50. fit, # survfit object with calculated statistics.
  51. risk.table = TRUE, # show risk table.
  52. pval = TRUE, # show p-value of log-rank test.
  53. conf.int = TRUE, # show confidence intervals for
  54. # point estimaes of survival curves.
  55. xlim = c(0,200), # present narrower X axis, but not affect
  56. # survival estimates.
  57. risk.table.y.text.col = T, # colour risk table text annotations.
  58. risk.table.y.text = FALSE # show bars instead of names in text annotations
  59. # in legend of risk table
  60. )

Ovarian Cancer (OV)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 89 88
  5. # TRUE 76 121

13-gene signature - 图1

13-gene signature - 图2

Head and Neck Cancer (HNSC)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 105 148
  5. # TRUE 115 132

13-gene signature - 图3

13-gene signature - 图4

Acute Myeloid Leukemia (LAML)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 29 32
  5. # TRUE 30 41

13-gene signature - 图5

13-gene signature - 图6

Adrenocortical Cancer (ACC) (p=0.0014)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 15 21
  5. # TRUE 22 21

13-gene signature - 图7

13-gene signature - 图8

Bile Duct Cancer (CHOL)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 8 6
  5. # TRUE 17 14

13-gene signature - 图9

13-gene signature - 图10

Bladder Cancer (BLCA) (p=0.034)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 83 104
  5. # TRUE 122 115

13-gene signature - 图11

13-gene signature - 图12

Breast Cancer (BRCA)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 253 274
  5. # TRUE 285 382

13-gene signature - 图13

13-gene signature - 图14

Cervical Cancer (CESC)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 54 85
  5. # TRUE 81 76

13-gene signature - 图15

13-gene signature - 图16

Colon Cancer (COAD)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 93 122
  5. # TRUE 134 138

13-gene signature - 图17

13-gene signature - 图18

Endometrioid Cancer (UCEC)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 88 149
  5. # TRUE 163 167

13-gene signature - 图19

13-gene signature - 图20

Esophageal Cancer (ESCA)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 31 44
  5. # TRUE 50 47

13-gene signature - 图21

13-gene signature - 图22

Glioblastoma (GBM)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 31 43
  5. # TRUE 41 52

13-gene signature - 图23

13-gene signature - 图24

Kidney Chromophobe (KICH) (14 datasets)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 23 22
  5. # TRUE 18 24

13-gene signature - 图25

13-gene signature - 图26

Kidney Clear Cell Carcinoma (KIRC)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 97 112
  5. # TRUE 159 234

13-gene signature - 图27

13-gene signature - 图28

Kidney Papillary Cell Carcinoma (KIRP) (p=4e-04)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 69 88
  5. # TRUE 74 87

13-gene signature - 图29

13-gene signature - 图30

Large B-cell Lymphoma (DLBC)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 11 11
  5. # TRUE 13 12

13-gene signature - 图31

13-gene signature - 图32

Liver Cancer (LIHC)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 62 103
  5. # TRUE 123 130

13-gene signature - 图33

13-gene signature - 图34

Lower Grade Glioma (LGG) (p<0.0001)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 118 158
  5. # TRUE 117 131

13-gene signature - 图35
13-gene signature - 图36

Lung Adenocarcinoma (LUAD) (p=0.0026)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 133 133
  5. # TRUE 126 180

13-gene signature - 图37

13-gene signature - 图38

Lung Squamous Cell Carcinoma (LUSC)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 69 136
  5. # TRUE 150 187

13-gene signature - 图39

13-gene signature - 图40

Melanoma (SKCM)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 94 128
  5. # TRUE 106 130

13-gene signature - 图41

13-gene signature - 图42

不知道为什么不显示p值…

Mesothelioma (MESO) (p<0.0001)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 16 19
  5. # TRUE 23 26

13-gene signature - 图43

13-gene signature - 图44

Ocular melanomas (UVM)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 13 24
  5. # TRUE 20 23

13-gene signature - 图45

13-gene signature - 图46

Pancreatic Cancer (PAAD)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 25 60
  5. # TRUE 53 43

13-gene signature - 图47

13-gene signature - 图48

Pheochromocytoma & Paraganglioma (PCPG)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 46 43
  5. # TRUE 43 54

13-gene signature - 图49

13-gene signature - 图50

Prostate Cancer (PRAD)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 125 152
  5. # TRUE 123 151

13-gene signature - 图51

13-gene signature - 图52

Rectal Cancer (READ)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 29 46
  5. # TRUE 43 49

13-gene signature - 图53

13-gene signature - 图54

Sarcoma (SARC) (p=0.0073)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 42 69
  5. # TRUE 73 78

13-gene signature - 图55

13-gene signature - 图56

Stomach Cancer (STAD)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 63 117
  5. # TRUE 108 93

13-gene signature - 图57

13-gene signature - 图58

Testicular Cancer (TGCT)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 63 117
  5. # TRUE 108 93

13-gene signature - 图59

13-gene signature - 图60

Thymoma (THYM) (p=0.023)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 25 26
  5. # TRUE 36 33

13-gene signature - 图61

13-gene signature - 图62

Thyroid Cancer (THCA)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 108 139
  5. # TRUE 147 173

13-gene signature - 图63

13-gene signature - 图64

Uterine Carcinosarcoma (UCS)

  1. table(as.data.frame(dat.pca$ind$coord[,1:2]>0))
  2. # Dim.2
  3. # Dim.1 FALSE TRUE
  4. # FALSE 17 13
  5. # TRUE 11 13

13-gene signature - 图65

13-gene signature - 图66