文件读取

1、csv打开方式:Excel、记事本、sublime(适用于大文件)、R语言读取
2、分隔符:逗号、空格、制表符(\t)
文件后缀名只有提示作用,不起决定性作用
3、read.csv()
read.table()
4、数据导出,成表格文件
读取—编辑修改—导出,不要覆盖原文件
5、数据保存格式:Rdata
特点:
①其他软件无法打开
②保存的是变量,不是表格文件。
save()保存 load()加载
save(test,file=”example.Rdata “)
文件存储方式

> #文件读写部分> #1.读取ex1.txt> ex1 <- read.table("ex1.txt")> ex1 <- read.table("ex1.txt",header= T)#识别列名>> #2.读取ex2.csv> ex2 <- read.csv("ex2.csv")> ex2 <- read.csv("ex2.csv",row.names = 1,check.names = F)> #R不能识别-,故变成.。所以需要加参数>> #注意:row.names的数据框不允许重复的行名> rod = read.csv("rod.csv",row.names = 1)Error in read.table(file = file, header = header, sep = sep, quote = quote, :'row.names'里不能有重复的名字> rod = read.csv("rod.csv")>> #3.读取soft.txt> soft <- read.table("soft.txt")Error in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :line 2 did not have 5 elements> soft <- read.table("soft.txt",header = T,fill = T)> #其实不对,sep会将看不见的符号(空格、制表符)等情况识别为分隔符> soft2 <- read.table("soft.txt",header = T,sep = "\t")> #需要再增加参数sep = "\t",才能避免入坑>> #4.soft 的行数列数是多少?列名是什么> dim(soft)[1] 1000 5> colnames(soft)[1] "ID" "SEQUENCE" "GeneName"[4] "GB_ACC" "SPOT_ID">> #5.将soft导出为csv> write.csv(soft,file = "soft.csv")>> #6.将soft保存为Rdata并加载。> save(soft,file = "soft.Rdata")> rm(list = ls()) #soft现在在同个文件夹,所以需要清空> load(file = "soft.Rdata")>


练习
> 练习5-1:错误: unexpected input在"练习5-1:"里> # 1.读取complete_set.txt(已保存在工作目录)> x=read.table('complete_set.txt',header = T)> # 2.查看有多少行、多少列> dim(x)[1] 50 20> # 3.查看列名> colnames(x)[1] "geneA" "geneB" "geneC" "geneD" "geneE" "geneF"[7] "geneG" "geneH" "geneI" "geneJ" "geneK" "geneL"[13] "geneM" "geneN" "geneO" "geneP" "geneQ" "geneR"[19] "geneS" "geneT"> # 4.导出为csv格式> write.csv(x,file = "x.csv")> # 5.保存为Rdata,再加载它> save(x,file="x.Rdata")> rm(list = ls())> load("x.Rdata")> # 6.加载y.Rdata(已保存在工作目录),求gene1列的平均值> load("y.Rdata")> mean(y$gene1)Error in y$gene1 : $ operator is invalid for atomic vectors> class(y)[1] "matrix" "array"> mean(y[,1])[1] NAWarning message:In mean.default(y[, 1]) : 参数不是数值也不是逻辑值:回覆NA> y[,1]GSM1 GSM2 GSM3 GSM4 GSM5 GSM6"40" "20" "51" "46" "38" "49"> mean(as.numeric(y[,1]))[1] 40.66667> #why?> y[,1] = as.numeric(y[,1])> y[,1]GSM1 GSM2 GSM3 GSM4 GSM5 GSM6"40" "20" "51" "46" "38" "49">
