1. #理解inferCNV结果文件
    2. rm(list=ls())
    3. options(stringsAsFactors = F)
    4. library(phylogram)
    5. library(gridExtra)
    6. library(grid)
    7. require(dendextend)
    8. require(ggthemes)
    9. library(tidyverse)
    10. library(Seurat)
    11. library(infercnv)
    12. library(miscTools)
    13. # Import inferCNV dendrogram
    14. infercnv.dend <- read.dendrogram(file = "inferCNV_output/infercnv.observations_dendrogram.txt")
    15. # Cut tree
    16. infercnv.labels <- cutree(infercnv.dend,
    17. k = 6,
    18. order_clusters_as_data = FALSE)
    19. table(infercnv.labels)
    20. # Color labels
    21. the_bars <- as.data.frame(tableau_color_pal("Tableau 20")(20)[infercnv.labels])
    22. colnames(the_bars) <- "inferCNV_tree"
    23. the_bars$inferCNV_tree <- as.character(the_bars$inferCNV_tree)
    24. infercnv.dend %>% set("labels",rep("", nobs(infercnv.dend)) ) %>% plot(main="inferCNV dendrogram") %>%
    25. colored_bars(colors = as.data.frame(the_bars), dend = infercnv.dend, sort_by_labels_order = FALSE, add = T, y_scale=10, y_shift = 0)
    26. infercnv.labels=as.data.frame(infercnv.labels)
    27. groupFiles='groupFiles.txt'
    28. meta=read.table(groupFiles,sep = '\t')
    29. infercnv.labels$V1=rownames(infercnv.labels)
    30. meta=merge(meta,infercnv.labels,by='V1')
    31. table(meta[,2:3])
    32. if( ! file.exists( "cnv_scores.csv")){
    33. tmp=read.table("inferCNV_output/infercnv.references.txt", header=T)
    34. down=mean(rowMeans(tmp)) - 2 * mean( apply(tmp, 1, sd))
    35. up=mean(rowMeans(tmp)) + 2 * mean( apply(tmp, 1, sd))
    36. oneCopy=up-down
    37. oneCopy
    38. a1= down- 2*oneCopy
    39. a2= down- 1*oneCopy
    40. down;up
    41. a3= up + 1*oneCopy
    42. a4= up + 2*oneCopy
    43. cnv_table <- read.table("inferCNV_output/infercnv.observations.txt", header=T)
    44. # Score cells based on their CNV scores
    45. # Replicate the table
    46. cnv_score_table <- as.matrix(cnv_table)
    47. cnv_score_mat <- as.matrix(cnv_table)
    48. # Scoring
    49. cnv_score_table[cnv_score_mat > 0 & cnv_score_mat < a2] <- "A" #complete loss. 2pts
    50. cnv_score_table[cnv_score_mat >= a2 & cnv_score_mat < down] <- "B" #loss of one copy. 1pts
    51. cnv_score_table[cnv_score_mat >= down & cnv_score_mat < up ] <- "C" #Neutral. 0pts
    52. cnv_score_table[cnv_score_mat >= up & cnv_score_mat <= a3] <- "D" #addition of one copy. 1pts
    53. cnv_score_table[cnv_score_mat > a3 & cnv_score_mat <= a4 ] <- "E" #addition of two copies. 2pts
    54. cnv_score_table[cnv_score_mat > a4] <- "F" #addition of more than two copies. 2pts
    55. # Check
    56. table(cnv_score_table[,1])
    57. # Replace with score
    58. cnv_score_table_pts <- cnv_table
    59. rm(cnv_score_mat)
    60. #
    61. cnv_score_table_pts[cnv_score_table == "A"] <- 2
    62. cnv_score_table_pts[cnv_score_table == "B"] <- 1
    63. cnv_score_table_pts[cnv_score_table == "C"] <- 0
    64. cnv_score_table_pts[cnv_score_table == "D"] <- 1
    65. cnv_score_table_pts[cnv_score_table == "E"] <- 2
    66. cnv_score_table_pts[cnv_score_table == "F"] <- 2
    67. # Scores are stored in “cnv_score_table_pts”. Use colSums to add up scores for each cell and store as vector
    68. cell_scores_CNV <- as.data.frame(colSums(cnv_score_table_pts))
    69. colnames(cell_scores_CNV) <- "cnv_score"
    70. head(cell_scores_CNV)
    71. write.csv(x = cell_scores_CNV, file = "cnv_scores.csv")
    72. }
    73. # 除去了reference后的走inferCNV的细胞
    74. cell_scores_CNV=read.csv('cnv_scores.csv',row.names = 1)
    75. head(cell_scores_CNV)
    76. load(file = '../section-01-cluster/basic.sce.pbmc.Rdata')
    77. sce=pbmc
    78. phe=sce@meta.data
    79. phe$celltype=Idents(sce)
    80. head(rownames(phe))
    81. head(rownames(cell_scores_CNV))
    82. #rownames(phe)=paste0('X',rownames(phe))
    83. rownames(phe)=gsub('-','.',rownames(phe))
    84. head(rownames(phe))
    85. head(rownames(cell_scores_CNV))
    86. head(rownames(phe))
    87. phe=phe[rownames(phe) %in% rownames(cell_scores_CNV),]
    88. identical(rownames(phe),rownames(cell_scores_CNV))
    89. infercnv.labels <- cutree(infercnv.dend, k = 6, order_clusters_as_data = FALSE)
    90. phe$inferCNV= infercnv.labels[match(rownames(phe), names(infercnv.labels) )]
    91. phe$cnv_scores = cell_scores_CNV[rownames(phe),]
    92. table(phe$celltype,phe$inferCNV)
    93. head(rownames(phe))
    94. dim(phe)
    95. library(ggpubr)
    96. p1=ggboxplot(phe,'celltype','cnv_scores', fill = "celltype")
    97. p1= p1+ theme(axis.text.x = element_text(angle = 45,
    98. vjust = 0.5, hjust=0.5))
    99. p2=ggboxplot(phe,'inferCNV','cnv_scores', fill = "inferCNV")
    100. library(patchwork)
    101. p1+p2
    102. ggsave(filename = 'anno_CNVscore.pdf')
    103. table(phe$celltype,phe$inferCNV)
    104. #rownames(phe)=gsub('X','',rownames(phe))
    105. rownames(phe)=gsub('[.]','-',rownames(phe))
    106. head(rownames(phe))
    107. sce
    108. tail(rownames(sce@meta.data))
    109. head(rownames(phe))
    110. sce$celltype=Idents(sce)
    111. table(sce$celltype)
    112. sce=subset(sce,celltype %in% c('epithelial' ))
    113. sce
    114. kp=rownames(sce@meta.data) %in% rownames(phe)
    115. table(kp)
    116. sce=sce[,kp]
    117. phe=phe[rownames(sce@meta.data),]
    118. sce@meta.data=phe
    119. head(phe)
    120. save(sce, file = 'epi_sce_annoCNV.Rdata')