数据结构基本点

2、向量 - 图1

逻辑性数据

比较运算的结果是逻辑值
>,<,<=,>=,==(相等吗?),!=(不相等吗?)

image.png

逻辑运算
与&、或|、非!
image.png

数据类型的判断和转换

image.png
image.png

数据类型转换优先顺序

字符>数值>逻辑

image.png

实操

2.1向量生成

(1)用 c() 结合到一起【类似陈列】
(2)连续的数字用冒号“:”
(3)有重复的用rep(),有规律的序列用seq(),随机数用rnorm
(4)通过组合,产生更为复杂的向量
image.png
练习
image.png
image.png
如果数据值填错时,可能出现以下情况:
前面<具体数值时,会出现自动补齐;
前面>具体数值时,会出现重复等
image.png

2.2 单个向量

(1)赋值给一个变量名
image.png
image.png
(2)简单数学计算
image.png
(3)根据某条件进行判断,生成逻辑型向量
image.png
(4)初级统计
max(x) #最大值
min(x) #最小值
mean(x) #均值
median(x) #中位数
var(x) #方差
sd(x) #标准差
sum(x) #总和
image.png
‼️重要函数
length(x) #长度
unique(x) #去重复
duplicated(x) #对应元素是否重复
table(x) #重复值统计
sort(x)
sort(x,decreasing = F)
sort(x,decreasing = T)
image.png

2.3.两个向量

(1)比较运算,生成等长的逻辑向量
(2)数学计算
(3)连接
image.png
当两个向量长度不一致
image.png
解答:
x和y不等长时:发生循环补齐【短循环补齐长的】
image.png
可利用循环补齐简化代码
image.png
(4)交集、并集、差集
intersect(x,y)
union(x,y)
setdiff(x,y)
setdiff(y,x)
image.png

2.4.向量筛选(取子集): []

image.png
根据
image.png
image.png

image.png
思考题:
image.png
image.png

2.5.修改向量中的某个/某些元素:取子集+赋值

image.png

2.6 简单向量作图

image.png

涉及代码部分

  1. 基本学习操作
  2. > #陈列数字或字符用c()
  3. > c(1,3,5)
  4. [1] 1 3 5
  5. > c("a","b","A")
  6. [1] "a" "b" "A"
  7. > #连续数字用:
  8. > 1:9
  9. [1] 1 2 3 4 5 6 7 8 9
  10. > #重复用rep()
  11. > rep("x",times=6)
  12. [1] "x" "x" "x" "x" "x" "x"
  13. > #有规律的数列用seq()
  14. > seq(from=3,to=11,by=2)
  15. [1] 3 5 7 9 11
  16. > #随机数用rnorm
  17. > rnorm(n=2)
  18. [1] -0.4587199 0.1827148
  19. > #组合用paste0
  20. > paste0(rep("x",times=3),1:2)
  21. [1] "x1" "x2" "x1"
  22. > paste0(rep("student",times=3),seq(from=2,to=10,by=2))
  23. [1] "student2" "student4"
  24. [3] "student6" "student8"
  25. [5] "student10"
  26. > paste0(rep("student"),seq(2,10,2))
  27. [1] "student2" "student4"
  28. [3] "student6" "student8"
  29. [5] "student10"
  30. > a = seq(from=2,to=14,by=2)
  31. > paste0(rep("student",length(a)),a)
  32. [1] "student2" "student4" "student6"
  33. [4] "student8" "student10" "student12"
  34. [7] "student14"
  35. 数据类型转换
  36. > c(2,"a")#数值型自动被转换为字符型数据
  37. [1] "2" "a"
  38. > c(TRUE,"a")#逻辑型自动被转换为字符型数据
  39. [1] "TRUE" "a"
  40. > c(TRUE,FALSE,5)#逻辑型自动被转换为数值型数据
  41. [1] 1 0 5
  42. > #单个向量操作
  43. > #1、赋值,
  44. > #可用名称xyz、AB、_、.
  45. > #不可用空格、“”、c、-、中文
  46. > x <- c(2,4,7) #规范写法
  47. > x = c(2,4,7)#随意写法
  48. > x
  49. [1] 2 4 7
  50. > (x <- c(2,4,7)) #赋值+输出(专用代码)
  51. [1] 2 4 7
  52. > x <- c(2,4,7);x #两行代码并行实现,用分号隔开
  53. [1] 2 4 7
  54. > #2、简单运算
  55. > x <- c(3,4,8)
  56. > x+1
  57. [1] 4 5 9
  58. > log(x)# 取对数
  59. [1] 1.098612 1.386294 2.079442
  60. > sqrt(x)#开根号
  61. [1] 1.732051 2.000000 2.828427
  62. > #3、根据某条件进行判断,产生逻辑向量
  63. > x>4
  64. [1] FALSE FALSE TRUE
  65. > x==4
  66. [1] FALSE TRUE FALSE
  67. > #4、初级统计
  68. > x <- c(1,5,8,1)
  69. > #长度
  70. > length(x)
  71. [1] 4
  72. > #去重复,从左往右,若第一次出现则不重复,其余重复出现则为重复
  73. > unique(x)
  74. [1] 1 5 8
  75. > #判断是否重复,同unique的定义
  76. > duplicated(x)
  77. [1] FALSE FALSE FALSE TRUE
  78. > !duplicated(x)
  79. [1] TRUE TRUE TRUE FALSE
  80. > #重复值统计
  81. > table(x)
  82. x
  83. 1 5 8
  84. 2 1 1
  85. > #排序
  86. > sort(x)#默认从小到大
  87. [1] 1 1 5 8
  88. > sort(x,decreasing = F)
  89. [1] 1 1 5 8
  90. > sort(x,decreasing = T)
  91. [1] 8 5 1 1
  92. > #对两个向量的操作
  93. > #1、比较运算,生成等长的逻辑向量
  94. > x <- c(1,3,6,1)
  95. > y <- c(3,2,1,6)
  96. > x == y
  97. [1] FALSE FALSE FALSE FALSE
  98. > y == x
  99. [1] FALSE FALSE FALSE FALSE
  100. > #2、数学运算
  101. > x+y
  102. [1] 4 5 7 7
  103. > #3、连接
  104. > paste(x,y,seq=",")
  105. [1] "1 3 ," "3 2 ," "6 1 ," "1 6 ,"
  106. > paste0(x,y)
  107. [1] "13" "32" "61" "16"
  108. > paste(x,y,seq="")
  109. [1] "1 3 " "3 2 " "6 1 " "1 6 "
  110. > paste(x,y)
  111. [1] "1 3" "3 2" "6 1" "1 6"
  112. > # 当两个向量不等长时,发生循环补齐,长的说了算
  113. > x <- c(1,3,6,1)
  114. > y <- c(3,2,6)
  115. > x == y
  116. [1] FALSE FALSE TRUE FALSE
  117. Warning message:
  118. In x == y : 长的对象长度不是短的对象长度的整倍数
  119. > y == x
  120. [1] FALSE FALSE TRUE FALSE
  121. Warning message:
  122. In y == x : 长的对象长度不是短的对象长度的整倍数
  123. > #利用循环补齐简化代码
  124. > paste(rep("x",3),1:3)
  125. [1] "x 1" "x 2" "x 3"
  126. > paste0("x",1:3)
  127. [1] "x1" "x2" "x3"
  128. > #4、交集、并集、交集
  129. > x <- c(1,5,8,1)
  130. > y <- c(3,1,8)
  131. > intersect(x,y)
  132. [1] 1 8
  133. > union(x,y)
  134. [1] 1 5 8 3
  135. > setdiff(x,y)
  136. [1] 5
  137. > x %in% y #x的每个元素在y中存在吗
  138. [1] TRUE FALSE TRUE TRUE
  139. > y %in% x #y的每个元素在x中存在吗
  140. [1] FALSE TRUE TRUE
  141. >
  142. > #注意xy的赋值形式
  143. > x <- c(1,1,3,5,7);y <- c(2,4,5,6,7)
  144. > x <- c(1,1,3,5,7)
  145. > y <- c(2,4,5,6,7)
  146. >
  147. > x>10 #可得到与x等长的逻辑向量
  148. [1] FALSE FALSE FALSE FALSE FALSE
  149. > x %in% y #可得到与x等长的逻辑向量
  150. [1] FALSE FALSE FALSE TRUE TRUE
  151. > duplicated(x) #可得到与x等长的逻辑向量
  152. [1] FALSE TRUE FALSE FALSE FALSE
  153. > unique(x) #不能可得到与x等长的逻辑向量
  154. [1] 1 3 5 7
  155. #循环补齐
  156. > x <- c(1,5,7);y <- c(2,4,5,6,7)
  157. > x==y #可发生循环补齐
  158. [1] FALSE FALSE FALSE FALSE FALSE
  159. Warning message:
  160. In x == y : 长的对象长度不是短的对象长度的整倍数
  161. > unique(x) #不发生循环补齐
  162. [1] 1 5 7
  163. > x %in% y #不发生循环补齐
  164. [1] FALSE TRUE TRUE
  165. > paste(x,y) #可发生循环补齐
  166. [1] "1 2" "5 4" "7 5" "1 6" "5 7"
  167. > x+y #可发生循环补齐
  168. [1] 3 9 12 7 12
  169. Warning message:
  170. In x + y : 长的对象长度不是短的对象长度的整倍数
  171. > #向量筛选(取子集)[],即将TRUE对应的值挑选出来
  172. > x <- 8:15
  173. > x==10
  174. [1] FALSE FALSE TRUE FALSE FALSE FALSE FALSE
  175. [8] FALSE
  176. > x[x==10]
  177. [1] 10
  178. > x<12
  179. [1] TRUE TRUE TRUE TRUE FALSE FALSE FALSE
  180. [8] FALSE
  181. > x[x<12]
  182. [1] 8 9 10 11
  183. >
  184. > x %in% c(9,14)
  185. [1] FALSE TRUE FALSE FALSE FALSE FALSE TRUE
  186. [8] FALSE
  187. > x[x%in%c(9,14)]
  188. [1] 9 14
  189. > #[]在没有遇到TRUE或FALSE时,可表示下标/位置(即第几个时对应的值)
  190. > x[2]
  191. [1] 9
  192. > x[2:5]
  193. [1] 9 10 11 12
  194. > x[c(1,5)]
  195. [1] 8 12
  196. > x[-3]# -代表的是反选
  197. [1] 8 9 11 12 13 14 15
  198. > x[-(2:4)]
  199. [1] 8 12 13 14 15
  200. > #简单向量作图
  201. > k1=rnorm(6);k1
  202. [1] -0.4591089 0.1028639 -0.1179319 -0.5134955 0.2737513 1.1585768
  203. > k2=rep(c("a","b"),each=3);k2
  204. [1] "a" "a" "a" "b" "b" "b"
  205. > boxplot(k1~k2)
  206. 作业
  207. > load("gands.Rdata")
  208. > length(g)#计算长度
  209. [1] 100
  210. > length(unique(g))#去重后的个数
  211. [1] 55
  212. > g[g=seq(from=2,to=50,by=2)]#筛选下标为偶数的基因名
  213. [1] "CRAMP1L" "PRSS8" "CRAMP1L" "SLCO1C1"
  214. [5] "COMMD1" "CCT4" "RAB7A" "ZDHHC16"
  215. [9] "MYL12B" "SNRPE" "ZNF586" "GGT7"
  216. [13] "RAB7A" "AFG3L2" "AC104581.1" "MPP2"
  217. [17] "ATP2A2" "SNRPE" "PRSS8" "ZNF461"
  218. [21] "CECR5" "CLEC17A" "ATG10" "ATG10"
  219. [25] "SLC25A25"
  220. 其他方式
  221. g[c(T,F)]
  222. > table(g %in% s)
  223. FALSE TRUE
  224. 37 63
  225. > g[g %in% s] #g中有多少元素在s中存在
  226. [1] "GFM2" "SLCO1C1" "NYNRIN"
  227. [4] "COMMD1" "COMMD1" "AC017081.1"
  228. [7] "RAB7A" "CASKIN2" "GGT7"
  229. [10] "SNRPE" "RGPD3" "ZNF586"
  230. [13] "COMMD1" "GGT7" "URB1"
  231. [16] "RAB7A" "MPP2" "AFG3L2"
  232. [19] "URB1" "AC104581.1" "MPP2"
  233. [22] "SNRPE" "ARHGAP1" "ZNF461"
  234. [25] "OR2D3" "CECR5" "SPDL1"
  235. [28] "CLEC17A" "ZNF461" "ATG10"
  236. [31] "ATG10" "ATG10" "SLC25A25"
  237. [34] "SLC30A9" "SLCO1C1" "GGT7"
  238. [37] "CASKIN2" "GSTP1" "MPP2"
  239. [40] "NYNRIN" "INTS12" "MPP2"
  240. [43] "RGPD3" "RGPD3" "SLC30A9"
  241. [46] "C10orf128" "HBD" "SLC30A9"
  242. [49] "GGT7" "HEPH" "RP5-1021I20.4"
  243. [52] "KLHDC8A" "HBD" "ZNF586"
  244. [55] "CECR5" "OR2D3" "LIPE"
  245. [58] "INTS12" "LIPE" "SPDL1"
  246. [61] "SLCO1C1" "GGT7" "CECR5"
  247. > duplicated(g)#有多少哥基因在g中出现不止一次
  248. [1] FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE
  249. [11] TRUE FALSE FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE
  250. [21] FALSE FALSE TRUE TRUE FALSE TRUE FALSE FALSE TRUE FALSE
  251. [31] FALSE TRUE FALSE FALSE TRUE TRUE FALSE TRUE FALSE FALSE
  252. [41] FALSE FALSE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE
  253. [51] FALSE FALSE FALSE TRUE FALSE TRUE TRUE FALSE FALSE TRUE
  254. [61] TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE FALSE
  255. [71] FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE TRUE FALSE
  256. [81] TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE
  257. [91] FALSE FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE
  258. > g[duplicated(g)]
  259. [1] "CRAMP1L" "COMMD1" "CCT4" "MYL12B" "COMMD1" "GGT7"
  260. [7] "RAB7A" "URB1" "MPP2" "IL19" "SNRPE" "PRSS8"
  261. [13] "ZNF461" "ATG10" "ATG10" "ZDHHC16" "SLCO1C1" "GGT7"
  262. [19] "CASKIN2" "UBAC1" "MPP2" "NYNRIN" "MYL12B" "MPP2"
  263. [25] "RGPD3" "RGPD3" "SLC30A9" "SLC30A9" "MYL12B" "GGT7"
  264. [31] "TUBA4A" "HBD" "CCT4" "ZNF586" "CECR5" "OR2D3"
  265. [37] "CRAMP1L" "INTS12" "LIPE" "SPDL1" "SLCO1C1" "MARC2"
  266. [43] "GGT7" "LCP1" "CECR5"
  267. > unique(g[duplicated(g)])
  268. [1] "CRAMP1L" "COMMD1" "CCT4" "MYL12B" "GGT7" "RAB7A"
  269. [7] "URB1" "MPP2" "IL19" "SNRPE" "PRSS8" "ZNF461"
  270. [13] "ATG10" "ZDHHC16" "SLCO1C1" "CASKIN2" "UBAC1" "NYNRIN"
  271. [19] "RGPD3" "SLC30A9" "TUBA4A" "HBD" "ZNF586" "CECR5"
  272. [25] "OR2D3" "INTS12" "LIPE" "SPDL1" "MARC2" "LCP1"
  273. > length(unique(g[duplicated(g)]))
  274. [1] 30
  275. > rnorm(n=10,mean = 0,sd=18)#列出<2的数值
  276. [1] 12.180906 -31.719927 -23.929819 1.933203 34.938306
  277. [6] 6.453842 13.887899 11.719862 5.991164 -25.730037
  278. > z <- rnorm(n=10,mean = 0,sd=18)
  279. > z[z < -2]
  280. [1] -19.53764 -12.31483 -7.96186 -15.47074 -35.08725
  281. > z[z <(-2)]
  282. [1] -19.53764 -12.31483 -7.96186 -15.47074 -35.08725