获取更多R语言知识,请关注公众号:医学和生信笔记
医学和生信笔记 公众号主要分享:1.医学小知识、肛肠科小知识;2.R语言和Python相关的数据分析、可视化、机器学习等;3.生物信息学学习资料和自己的学习笔记!
这个包很方便下载TCGA的各种数据,而且是最新的,唯一的障碍是网络问题。记录以下这个包的使用方法,以下代码摘录自网络。
# if (!requireNamespace("BiocManager", quietly=TRUE))
# install.packages("BiocManager")
# BiocManager::install("TCGAbiolinks")
library(TCGAbiolinks)
library(dplyr)
library(DT)
library(SummarizedExperiment)
getGDCprojects()
#下载临床数据
clinical <- GDCquery_clinic(project = "TCGA-COAD", type = "clinical")
write.csv(clinical,file = "TCGA-COAD-clinical.csv")
save(clinical,file = "TCGA-COAD-clinical.RData")
#下载rna-seq的counts数据
query <- GDCquery(project = "TCGA-COAD",
data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",
workflow.type = "HTSeq - Counts")
#save(query, file = "query_mrnaCounts.RData")
GDCdownload(query, method = "api", files.per.chunk = 50)
expdat <- GDCprepare(query = query)
count_matrix <- assay(expdat)
write.csv(count_matrix,file = "TCGA-COAD-Counts.csv")
save(count_matrix,file = "expdat_mrna.RData")
#下载miRNA数据
query <- GDCquery(project = "TCGA-COAD",
data.category = "Transcriptome Profiling",
data.type = "miRNA Expression Quantification",
workflow.type = "BCGSC miRNA Profiling")
GDCdownload(query, method = "api", files.per.chunk = 50)
expdat_mirna <- GDCprepare(query = query)
write.csv(expdat_mirna,file = "TCGA-COAD-miRNA.csv")
save(expdat_mirna,file = "expdat_mirna.RData")
#下载Copy Number Variation数据
query <- GDCquery(project = "TCGA-COAD",
data.category = "Copy Number Variation",
data.type = "Copy Number Segment")
GDCdownload(query, method = "api", files.per.chunk = 50)
expdat <- GDCprepare(query = query)
save(expdat,file = "TCGA-COAD-Copy-Number-Variation.RData")
write.csv(expdat,file = "TCGA-COAD-Copy-Number-Variation.csv")
#下载Copy Number Variation GISTIC2数据
query <- GDCquery(project = "TCGA-COAD",
data.category = "Copy Number Variation",
data.type = "Gene Level Copy Number Scores",
access="open")
GDCdownload(query, method = "api")
GISTIC_cnv <- GDCprepare(query)
save(GISTIC_cnv,file = "TCGA-COAD-GISTIC-cnv.RData")
#下载甲基化数据,非常大,50多G
query.met <- GDCquery(project = "TCGA-COAD",
#legacy = TRUE,
data.category = "DNA Methylation")
GDCdownload(query.met, method = "api", files.per.chunk = 300)
expdat <- GDCprepare(query = query)
count_matrix=assay(expdat)
write.csv(count_matrix,file = "TCGA-COAD-methylation.csv")
# 下载SNV数据
acc.maf <- GDCquery_Maf("COAD", pipelines = "muse")
save(acc.maf,file = "TCGA-COAD-acc.maf.RData")
获取更多R语言知识,请关注公众号:医学和生信笔记
医学和生信笔记 公众号主要分享:1.医学小知识、肛肠科小知识;2.R语言和Python相关的数据分析、可视化、机器学习等;3.生物信息学学习资料和自己的学习笔记!