文件读取

image.png
1、csv打开方式:Excel、记事本、sublime(适用于大文件)、R语言读取
2、分隔符:逗号、空格、制表符(\t)
文件后缀名只有提示作用,不起决定性作用
3、read.csv()
read.table()
4、数据导出,成表格文件
image.png
读取—编辑修改—导出,不要覆盖原文件
5、数据保存格式:Rdata
特点:
①其他软件无法打开
②保存的是变量,不是表格文件。
save()保存 load()加载
save(test,file=”example.Rdata “)

文件存储方式
image.png
image.png

  1. > #文件读写部分
  2. > #1.读取ex1.txt
  3. > ex1 <- read.table("ex1.txt")
  4. > ex1 <- read.table("ex1.txt",header= T)#识别列名
  5. >
  6. > #2.读取ex2.csv
  7. > ex2 <- read.csv("ex2.csv")
  8. > ex2 <- read.csv("ex2.csv",row.names = 1,check.names = F)
  9. > #R不能识别-,故变成.。所以需要加参数
  10. >
  11. > #注意:row.names的数据框不允许重复的行名
  12. > rod = read.csv("rod.csv",row.names = 1)
  13. Error in read.table(file = file, header = header, sep = sep, quote = quote, :
  14. 'row.names'里不能有重复的名字
  15. > rod = read.csv("rod.csv")
  16. >
  17. > #3.读取soft.txt
  18. > soft <- read.table("soft.txt")
  19. Error in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
  20. line 2 did not have 5 elements
  21. > soft <- read.table("soft.txt",header = T,fill = T)
  22. > #其实不对,sep会将看不见的符号(空格、制表符)等情况识别为分隔符
  23. > soft2 <- read.table("soft.txt",header = T,sep = "\t")
  24. > #需要再增加参数sep = "\t",才能避免入坑
  25. >
  26. > #4.soft 的行数列数是多少?列名是什么
  27. > dim(soft)
  28. [1] 1000 5
  29. > colnames(soft)
  30. [1] "ID" "SEQUENCE" "GeneName"
  31. [4] "GB_ACC" "SPOT_ID"
  32. >
  33. > #5.将soft导出为csv
  34. > write.csv(soft,file = "soft.csv")
  35. >
  36. > #6.将soft保存为Rdata并加载。
  37. > save(soft,file = "soft.Rdata")
  38. > rm(list = ls()) #soft现在在同个文件夹,所以需要清空
  39. > load(file = "soft.Rdata")
  40. >

image.png
image.png
练习

  1. > 练习5-1
  2. 错误: unexpected input"练习5-1:"
  3. > # 1.读取complete_set.txt(已保存在工作目录)
  4. > x=read.table('complete_set.txt',header = T)
  5. > # 2.查看有多少行、多少列
  6. > dim(x)
  7. [1] 50 20
  8. > # 3.查看列名
  9. > colnames(x)
  10. [1] "geneA" "geneB" "geneC" "geneD" "geneE" "geneF"
  11. [7] "geneG" "geneH" "geneI" "geneJ" "geneK" "geneL"
  12. [13] "geneM" "geneN" "geneO" "geneP" "geneQ" "geneR"
  13. [19] "geneS" "geneT"
  14. > # 4.导出为csv格式
  15. > write.csv(x,file = "x.csv")
  16. > # 5.保存为Rdata,再加载它
  17. > save(x,file="x.Rdata")
  18. > rm(list = ls())
  19. > load("x.Rdata")
  20. > # 6.加载y.Rdata(已保存在工作目录),求gene1列的平均值
  21. > load("y.Rdata")
  22. > mean(y$gene1)
  23. Error in y$gene1 : $ operator is invalid for atomic vectors
  24. > class(y)
  25. [1] "matrix" "array"
  26. > mean(y[,1])
  27. [1] NA
  28. Warning message:
  29. In mean.default(y[, 1]) : 参数不是数值也不是逻辑值:回覆NA
  30. > y[,1]
  31. GSM1 GSM2 GSM3 GSM4 GSM5 GSM6
  32. "40" "20" "51" "46" "38" "49"
  33. > mean(as.numeric(y[,1]))
  34. [1] 40.66667
  35. > #why?
  36. > y[,1] = as.numeric(y[,1])
  37. > y[,1]
  38. GSM1 GSM2 GSM3 GSM4 GSM5 GSM6
  39. "40" "20" "51" "46" "38" "49"
  40. >