1. library(dplyr)
  2. library(tibble)
  3. library(ggplot2)
  4. library(survival)
  5. library(survminer)
  6. library(data.table)
  7. library(stringr)

load phenotype

  1. survival_data <- fread("../../Result/phenotype/common_survival_data.tsv")

clinical parameters

  1. get_clinical <- function(x){
  2. dat_clin <- x
  3. # pathologic_T
  4. dat_clin$Te1 <- str_extract(dat_clin$TNM, "T\\d[a-z]+")
  5. dat_clin$Te2 <- str_extract(dat_clin$TNM, "T\\d")
  6. dat_clin$Te1 <- with(dat_clin, ifelse(is.na(Te1), Te2, Te1))
  7. # table(dat_clin$Te1)
  8. # table(dat_clin$Te2)
  9. dat_clin$Te2[dat_clin$Te2 == "T1"] <- "T1-T2"
  10. dat_clin$Te2[dat_clin$Te2 == "T2"] <- "T1-T2"
  11. dat_clin$Te2[dat_clin$Te2 == "T3"] <- "T1-T2"
  12. dat_clin$Te2[dat_clin$Te2 == "T4"] <- "T3-T4"
  13. Te2 <- table(dat_clin$Te2)
  14. # pathologic_N
  15. dat_clin$N <- str_extract(dat_clin$TNM, "N\\d")
  16. dat_clin$N <- str_extract(dat_clin$N, "\\d")
  17. # table(dat_clin$N)
  18. dat_clin$N[dat_clin$N == "0"] <- "NO"
  19. dat_clin$N[dat_clin$N == "1"] <- "Yes"
  20. dat_clin$N[dat_clin$N == "2"] <- "Yes"
  21. dat_clin$N[dat_clin$N == "3"] <- "Yes"
  22. N <- table(dat_clin$N)
  23. # pathologic_M
  24. dat_clin$M <- str_extract(dat_clin$TNM, "M\\d")
  25. dat_clin$M <- str_extract(dat_clin$M, "\\d")
  26. # table(dat_clin$M)
  27. dat_clin$M[dat_clin$M == "0"] <- "NO"
  28. dat_clin$M[dat_clin$M == "1"] <- "Yes"
  29. M <- table(dat_clin$M)
  30. # Age
  31. dat_clin$Age <- ifelse(dat_clin$Age > 60, "old", "young")
  32. Age <- table(dat_clin$Age)
  33. # Gender
  34. dat_clin$Gender <- ifelse(dat_clin$Gender == "MALE", "Male", "Female")
  35. Gender <- table(dat_clin$Gender)
  36. # stage
  37. dat_clin$Stage1 <- str_trim(str_extract(dat_clin$Stage, "\\s[H-Z]+"),
  38. side = c("both", "left", "right"))
  39. # table(dat_clin$Stage1)
  40. dat_clin$Stage1[dat_clin$Stage1 == "I"] <- "Stage I-II"
  41. dat_clin$Stage1[dat_clin$Stage1 == "II"] <- "Stage I-II"
  42. dat_clin$Stage1[dat_clin$Stage1 == "III"] <- "Stage III-IV"
  43. dat_clin$Stage1[dat_clin$Stage1 == "IV"] <- "Stage III-IV"
  44. Stage <- table(dat_clin$Stage1)
  45. dat_cal <- data.frame(Clinicopathological=c(
  46. rep("Age", length(Age)),
  47. rep("Gender", length(Gender)),
  48. rep("Stage", length(Stage)),
  49. rep("T", length(Te2)),
  50. rep("N", length(N)),
  51. rep("M", length(M))),
  52. Name = c(names(Age), names(Gender),
  53. names(Stage), names(Te2),
  54. names(N), names(M)),
  55. Number = c(as.numeric(Age), as.numeric(Gender),
  56. as.numeric(Stage), as.numeric(Te2),
  57. as.numeric(N), as.numeric(M))
  58. )
  59. return(dat_cal)
  60. }
  61. DT::datatable(get_clinical(survival_data))

curation

  1. get_plot <- function(dat=survival_data, tag="Gender"){
  2. # dat=survival_data
  3. # tag="Gender"
  4. # pvalue
  5. colnames(dat)[which(colnames(dat) == tag)] <- "group_info"
  6. dat$group_info <- factor(dat$group_info)
  7. factors <- unique(as.character(dat$group_info))
  8. cox <- coxph(Surv(OS.Time, OS) ~ group_info, data = dat)
  9. tmp <- summary(cox)
  10. tmp.wald <- data.frame(t(tmp$waldtest)) %>%
  11. setNames(c("Wald_test", "Wald_df", "Wald_pvlaue"))
  12. tmp.lg <- data.frame(t(tmp$logtest)) %>%
  13. setNames(c("lg_rank", "lg_rank_df", "lg_rank_pvlaue"))
  14. tmp.total <- cbind(tmp.wald, tmp.lg)
  15. pvalue <- paste(paste0("Log-Rank P=", signif(tmp.lg$lg_rank_pvlaue, 3)),
  16. paste0("Cox P=", signif(tmp.wald$Wald_pvlaue, 3)), sep = "\n")
  17. # plot
  18. fit <- survfit(Surv(OS.Time, OS) ~ group_info, data = dat)
  19. info <- data.frame(time = fit$time,
  20. n.risk = fit$n.risk,
  21. n.event = fit$n.event,
  22. n.censor = fit$n.censor,
  23. surv = fit$surv,
  24. upper = fit$upper,
  25. lower = fit$lower)
  26. pl <- ggsurvplot(fit,
  27. data = dat,
  28. surv.median.line = "hv",
  29. add.all = TRUE,
  30. palette = "aaas",
  31. risk.table = TRUE,
  32. xlab = "Follow up time(Years)",
  33. legend = c(0.8, 0.2),
  34. legend.title = "",
  35. legend.labs = c("all", factors),
  36. break.x.by = 2,
  37. font.legend = c(10, "italic"),
  38. ggtheme = theme_bw())
  39. pl$plot <- pl$plot +
  40. annotate("text", x=3, y=0.2, label=pvalue)
  41. res <- list(info=info, pl=pl)
  42. return(res)
  43. }

plot

  1. # gender
  2. gender_plot <- get_plot(tag = "Gender")
  3. gender_plot$pl
  4. # stage
  5. stage_plot <- get_plot(tag = "Stage")
  6. stage_plot$pl

version

  1. sessionInfo()
  1. R version 4.0.2 (2020-06-22)
  2. Platform: x86_64-w64-mingw32/x64 (64-bit)
  3. Running under: Windows 10 x64 (build 19042)
  4. Matrix products: default
  5. locale:
  6. [1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
  7. [3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
  8. [5] LC_TIME=English_United States.1252
  9. system code page: 936
  10. attached base packages:
  11. [1] stats graphics grDevices utils datasets methods base
  12. other attached packages:
  13. [1] stringr_1.4.0 impute_1.62.0 data.table_1.13.2 survminer_0.4.8 ggpubr_0.4.0
  14. [6] survival_3.2-7 ggplot2_3.3.2 tibble_3.0.3 dplyr_1.0.2
  15. loaded via a namespace (and not attached):
  16. [1] Rcpp_1.0.5 lattice_0.20-41 tidyr_1.1.2 zoo_1.8-8 digest_0.6.25
  17. [6] R6_2.5.0 cellranger_1.1.0 plyr_1.8.6 backports_1.1.10 evaluate_0.14
  18. [11] pillar_1.4.6 rlang_0.4.7 curl_4.3 readxl_1.3.1 rstudioapi_0.11
  19. [16] car_3.0-10 Matrix_1.2-18 DT_0.16 rmarkdown_2.5 splines_4.0.2
  20. [21] foreign_0.8-80 htmlwidgets_1.5.2 munsell_0.5.0 broom_0.7.2 compiler_4.0.2
  21. [26] xfun_0.19 pkgconfig_2.0.3 htmltools_0.5.0 tidyselect_1.1.0 gridExtra_2.3
  22. [31] km.ci_0.5-2 rio_0.5.16 reshape_0.8.8 crayon_1.3.4 withr_2.3.0
  23. [36] grid_4.0.2 jsonlite_1.7.1 xtable_1.8-4 gtable_0.3.0 lifecycle_0.2.0
  24. [41] magrittr_1.5 KMsurv_0.1-5 scales_1.1.1 zip_2.1.1 stringi_1.5.3
  25. [46] carData_3.0-4 ggsignif_0.6.0 ellipsis_0.3.1 survMisc_0.5.5 generics_0.1.0
  26. [51] vctrs_0.3.4 openxlsx_4.2.2 tools_4.0.2 forcats_0.5.0 glue_1.4.2
  27. [56] purrr_0.3.4 hms_0.5.3 crosstalk_1.1.0.1 rsconnect_0.8.16 abind_1.4-5
  28. [61] yaml_2.2.1 colorspace_1.4-1 rstatix_0.6.0 knitr_1.30 haven_2.3.1

Reference

  1. 解决生存分析和临床参数相关分析