library(dplyr)library(tibble)library(ggplot2)library(survival)library(survminer)library(data.table)library(stringr)
load phenotype
survival_data <- fread("../../Result/phenotype/common_survival_data.tsv")
clinical parameters
get_clinical <- function(x){ dat_clin <- x # pathologic_T dat_clin$Te1 <- str_extract(dat_clin$TNM, "T\\d[a-z]+") dat_clin$Te2 <- str_extract(dat_clin$TNM, "T\\d") dat_clin$Te1 <- with(dat_clin, ifelse(is.na(Te1), Te2, Te1)) # table(dat_clin$Te1) # table(dat_clin$Te2) dat_clin$Te2[dat_clin$Te2 == "T1"] <- "T1-T2" dat_clin$Te2[dat_clin$Te2 == "T2"] <- "T1-T2" dat_clin$Te2[dat_clin$Te2 == "T3"] <- "T1-T2" dat_clin$Te2[dat_clin$Te2 == "T4"] <- "T3-T4" Te2 <- table(dat_clin$Te2) # pathologic_N dat_clin$N <- str_extract(dat_clin$TNM, "N\\d") dat_clin$N <- str_extract(dat_clin$N, "\\d") # table(dat_clin$N) dat_clin$N[dat_clin$N == "0"] <- "NO" dat_clin$N[dat_clin$N == "1"] <- "Yes" dat_clin$N[dat_clin$N == "2"] <- "Yes" dat_clin$N[dat_clin$N == "3"] <- "Yes" N <- table(dat_clin$N) # pathologic_M dat_clin$M <- str_extract(dat_clin$TNM, "M\\d") dat_clin$M <- str_extract(dat_clin$M, "\\d") # table(dat_clin$M) dat_clin$M[dat_clin$M == "0"] <- "NO" dat_clin$M[dat_clin$M == "1"] <- "Yes" M <- table(dat_clin$M) # Age dat_clin$Age <- ifelse(dat_clin$Age > 60, "old", "young") Age <- table(dat_clin$Age) # Gender dat_clin$Gender <- ifelse(dat_clin$Gender == "MALE", "Male", "Female") Gender <- table(dat_clin$Gender) # stage dat_clin$Stage1 <- str_trim(str_extract(dat_clin$Stage, "\\s[H-Z]+"), side = c("both", "left", "right")) # table(dat_clin$Stage1) dat_clin$Stage1[dat_clin$Stage1 == "I"] <- "Stage I-II" dat_clin$Stage1[dat_clin$Stage1 == "II"] <- "Stage I-II" dat_clin$Stage1[dat_clin$Stage1 == "III"] <- "Stage III-IV" dat_clin$Stage1[dat_clin$Stage1 == "IV"] <- "Stage III-IV" Stage <- table(dat_clin$Stage1) dat_cal <- data.frame(Clinicopathological=c( rep("Age", length(Age)), rep("Gender", length(Gender)), rep("Stage", length(Stage)), rep("T", length(Te2)), rep("N", length(N)), rep("M", length(M))), Name = c(names(Age), names(Gender), names(Stage), names(Te2), names(N), names(M)), Number = c(as.numeric(Age), as.numeric(Gender), as.numeric(Stage), as.numeric(Te2), as.numeric(N), as.numeric(M)) ) return(dat_cal) }DT::datatable(get_clinical(survival_data))
curation
get_plot <- function(dat=survival_data, tag="Gender"){ # dat=survival_data # tag="Gender" # pvalue colnames(dat)[which(colnames(dat) == tag)] <- "group_info" dat$group_info <- factor(dat$group_info) factors <- unique(as.character(dat$group_info)) cox <- coxph(Surv(OS.Time, OS) ~ group_info, data = dat) tmp <- summary(cox) tmp.wald <- data.frame(t(tmp$waldtest)) %>% setNames(c("Wald_test", "Wald_df", "Wald_pvlaue")) tmp.lg <- data.frame(t(tmp$logtest)) %>% setNames(c("lg_rank", "lg_rank_df", "lg_rank_pvlaue")) tmp.total <- cbind(tmp.wald, tmp.lg) pvalue <- paste(paste0("Log-Rank P=", signif(tmp.lg$lg_rank_pvlaue, 3)), paste0("Cox P=", signif(tmp.wald$Wald_pvlaue, 3)), sep = "\n") # plot fit <- survfit(Surv(OS.Time, OS) ~ group_info, data = dat) info <- data.frame(time = fit$time, n.risk = fit$n.risk, n.event = fit$n.event, n.censor = fit$n.censor, surv = fit$surv, upper = fit$upper, lower = fit$lower) pl <- ggsurvplot(fit, data = dat, surv.median.line = "hv", add.all = TRUE, palette = "aaas", risk.table = TRUE, xlab = "Follow up time(Years)", legend = c(0.8, 0.2), legend.title = "", legend.labs = c("all", factors), break.x.by = 2, font.legend = c(10, "italic"), ggtheme = theme_bw()) pl$plot <- pl$plot + annotate("text", x=3, y=0.2, label=pvalue) res <- list(info=info, pl=pl) return(res)}
plot
# gender gender_plot <- get_plot(tag = "Gender")gender_plot$pl# stage stage_plot <- get_plot(tag = "Stage")stage_plot$pl
version
sessionInfo()
R version 4.0.2 (2020-06-22)Platform: x86_64-w64-mingw32/x64 (64-bit)Running under: Windows 10 x64 (build 19042)Matrix products: defaultlocale:[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 [3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C [5] LC_TIME=English_United States.1252 system code page: 936attached base packages:[1] stats graphics grDevices utils datasets methods base other attached packages:[1] stringr_1.4.0 impute_1.62.0 data.table_1.13.2 survminer_0.4.8 ggpubr_0.4.0 [6] survival_3.2-7 ggplot2_3.3.2 tibble_3.0.3 dplyr_1.0.2 loaded via a namespace (and not attached): [1] Rcpp_1.0.5 lattice_0.20-41 tidyr_1.1.2 zoo_1.8-8 digest_0.6.25 [6] R6_2.5.0 cellranger_1.1.0 plyr_1.8.6 backports_1.1.10 evaluate_0.14 [11] pillar_1.4.6 rlang_0.4.7 curl_4.3 readxl_1.3.1 rstudioapi_0.11 [16] car_3.0-10 Matrix_1.2-18 DT_0.16 rmarkdown_2.5 splines_4.0.2 [21] foreign_0.8-80 htmlwidgets_1.5.2 munsell_0.5.0 broom_0.7.2 compiler_4.0.2 [26] xfun_0.19 pkgconfig_2.0.3 htmltools_0.5.0 tidyselect_1.1.0 gridExtra_2.3 [31] km.ci_0.5-2 rio_0.5.16 reshape_0.8.8 crayon_1.3.4 withr_2.3.0 [36] grid_4.0.2 jsonlite_1.7.1 xtable_1.8-4 gtable_0.3.0 lifecycle_0.2.0 [41] magrittr_1.5 KMsurv_0.1-5 scales_1.1.1 zip_2.1.1 stringi_1.5.3 [46] carData_3.0-4 ggsignif_0.6.0 ellipsis_0.3.1 survMisc_0.5.5 generics_0.1.0 [51] vctrs_0.3.4 openxlsx_4.2.2 tools_4.0.2 forcats_0.5.0 glue_1.4.2 [56] purrr_0.3.4 hms_0.5.3 crosstalk_1.1.0.1 rsconnect_0.8.16 abind_1.4-5 [61] yaml_2.2.1 colorspace_1.4-1 rstatix_0.6.0 knitr_1.30 haven_2.3.1
Reference
- 解决生存分析和临床参数相关分析