tpm rpkm
# countMatrix is a matrix of read counts from featureCounts or other similar tools
# geneLength is a data frame with first column as gene id and second column as gene length
# TPM ---------------------------------------------------------------------
# 20190505 | update on 20200817
TPM <- function(countMatrix, geneLength) {
library(tidyverse)
library(data.table)
geneLength <- geneLength %>%
as.data.table() %>%
set_names(c("geneID","gLength"))
# sort gene order
geneLength <- geneLength[match(rownames(countMatrix), geneLength$geneID),]
# calculate tpm
tmp <- countMatrix/(geneLength$gLength / 1000)
col_sum_perM <- colSums(tmp) / 1e6
tmp1 <- sweep(tmp, 2, col_sum_perM, `/`)
}
# RPKM --------------------------------------------------------------------
# 20190604
RPKM <- function(countMatrix, geneLength) {
library(tidyverse)
library(data.table)
geneLength <- geneLength %>%
as.data.table() %>%
set_names(c("geneID","gLength"))
# sort gene order
geneLength <- geneLength[match(rownames(countMatrix), geneLength$geneID),]
# calculate rpkm
col_sum_perM <- colSums(countMatrix) / 1e6
tmp <- sweep(countMatrix, 2, col_sum_perM, `/`)
tmp1 <- tmp/(geneLength$gLength / 1000)
}