tpm rpkm

    1. # countMatrix is a matrix of read counts from featureCounts or other similar tools
    2. # geneLength is a data frame with first column as gene id and second column as gene length
    3. # TPM ---------------------------------------------------------------------
    4. # 20190505 | update on 20200817
    5. TPM <- function(countMatrix, geneLength) {
    6. library(tidyverse)
    7. library(data.table)
    8. geneLength <- geneLength %>%
    9. as.data.table() %>%
    10. set_names(c("geneID","gLength"))
    11. # sort gene order
    12. geneLength <- geneLength[match(rownames(countMatrix), geneLength$geneID),]
    13. # calculate tpm
    14. tmp <- countMatrix/(geneLength$gLength / 1000)
    15. col_sum_perM <- colSums(tmp) / 1e6
    16. tmp1 <- sweep(tmp, 2, col_sum_perM, `/`)
    17. }
    18. # RPKM --------------------------------------------------------------------
    19. # 20190604
    20. RPKM <- function(countMatrix, geneLength) {
    21. library(tidyverse)
    22. library(data.table)
    23. geneLength <- geneLength %>%
    24. as.data.table() %>%
    25. set_names(c("geneID","gLength"))
    26. # sort gene order
    27. geneLength <- geneLength[match(rownames(countMatrix), geneLength$geneID),]
    28. # calculate rpkm
    29. col_sum_perM <- colSums(countMatrix) / 1e6
    30. tmp <- sweep(countMatrix, 2, col_sum_perM, `/`)
    31. tmp1 <- tmp/(geneLength$gLength / 1000)
    32. }