iCluster分析是基于多组学数据的整合分类算法。更多知识分享请到 https://zouhua.top/

Rscript

  1. #!/usr/bin/R
  2. library(argparser)
  3. library(dplyr)
  4. library(tibble)
  5. library(ggplot2)
  6. library(data.table)
  7. library(iCluster)
  8. rm(list = ls())
  9. options(stringsAsFactors = F)
  10. options(future.globals.maxSize = 1000 * 1024^2)
  11. grp <- c("S1", "S2")
  12. grp.col <- c("#6C326C", "#77A2D1")
  13. # parameter input
  14. parser <- arg_parser("iCluster function") %>%
  15. add_argument("-p", "--phen",
  16. help = "phenotype") %>%
  17. add_argument("-c", "--copyNumber",
  18. help = "copyNumber matrix") %>%
  19. add_argument("-g", "--geneExp",
  20. help = "gene Expression matrix") %>%
  21. add_argument("-m", "--methylation",
  22. help = "DNA methylation matrix") %>%
  23. add_argument("-r", "--RPPA",
  24. help = "protein_RPPA") %>%
  25. add_argument("-o", "--out",
  26. help = "result with director", default = "./")
  27. args <- parse_args(parser)
  28. # prepare for function
  29. phen <- fread(args$p)
  30. copyNumber <- fread(args$c)
  31. geneExp <- fread(args$g)
  32. methylation <- fread(args$m)
  33. protein_RPPA <- fread(args$r)
  34. out <- args$o
  35. get_profile <- function(dataset = copyNumber,
  36. metadata = phen,
  37. tag = "copyNumber"){
  38. # dataset = protein_RPPA
  39. # metadata = phen
  40. # tag = "protein_RPPA"
  41. sid <- intersect(phen$Barcode, colnames(dataset))
  42. res <- dataset %>% dplyr::select(c("V1", sid)) %>%
  43. mutate(Type=tag) %>%
  44. mutate(Name=paste(V1, Type, sep = "_")) %>%
  45. dplyr::select(Name, V1, Type, everything()) %>%
  46. dplyr::select(-c("V1", "Type")) %>%
  47. column_to_rownames("Name") %>%
  48. t()
  49. return(res)
  50. }
  51. datasets <- list(copyNumber=get_profile(dataset = copyNumber, tag = "copyNumber"),
  52. geneExp=get_profile(dataset = geneExp, tag = "geneExp"),
  53. methylation=get_profile(dataset = methylation, tag = "methylation"),
  54. protein_RPPA=get_profile(dataset = protein_RPPA, tag = "protein_RPPA"))
  55. print(names(datasets))
  56. # icluster
  57. fit <- iCluster(datasets = datasets, k=2, lambda=rep(0.2, 4), max.iter = 50, epsilon = 1e-3)
  58. #plotiCluster(fit=fit, label=rownames(datasets[[2]]))
  59. #compute.pod(fit)
  60. phen_new_icluster <- inner_join(phen,
  61. data.frame(SampleID=rownames(datasets[[2]]), Cluster=fit$clusters),
  62. by = c("Barcode"="SampleID")) %>%
  63. dplyr::select(Barcode, Cluster, everything()) %>%
  64. mutate(Cluster=paste0("S", Cluster))
  65. name1 <- paste0(out, "phenotype_cluster_iCluster.csv")
  66. write.csv(phen_new_icluster, file = name1, row.names = F)
  67. fit2 <- iCluster2(datasets = datasets, k=2, lambda=list(0.2, 0.2, 0.2, 0.2),
  68. max.iter = 50, verbose = TRUE)
  69. if(0){
  70. phen_new_icluster2 <- inner_join(phen,
  71. data.frame(SampleID=rownames(datasets[[2]]), Cluster=fit2$clusters),
  72. by = c("Barcode"="SampleID")) %>%
  73. dplyr::select(Barcode, Cluster, everything()) %>%
  74. mutate(Cluster=paste0("S", Cluster))
  75. name2 <- paste(out, "phenotype_cluster_iCluster2.csv")
  76. write.csv(phen_new_icluster2, file = name2, row.names = F)
  77. }
  78. save(fit, fit2, file = "iCluster_fit.RData")

Run

Rscript iCluster.R -p ../../Result/phenotype/common_survival_data.tsv \
 -c ../../Result/profile/copyNumber_filter.tsv \
 -g ../../Result/profile/geneExp_filter.tsv \
 -m ../../Result/profile/methylation_filter.tsv \
 -r ../../Result/profile/protein_RPPA_filter.tsv \
 -o ./

Reference

  1. iCluster package

参考文章如引起任何侵权问题,可以与我联系,谢谢。