table() 统计函数

  1. > # table() 统计函数
  2. > library(reshape2) #tips数据集在reshape2包里
  3. > View(tips)
  4. > table(tips$sex, tips$smoker)
  5. No Yes
  6. Female 54 33
  7. Male 97 60
  8. > table(tips$sex, tips$day)
  9. Fri Sat Sun Thur
  10. Female 9 28 18 32
  11. Male 10 59 58 30
  12. > hightip <- tips[,"tip"] > mean(tips[,"tip"]) #返回逻辑值
  13. > head(hightip)
  14. [1] FALSE FALSE TRUE TRUE TRUE TRUE

addmargins() 行和列分别求和

  1. > # addmargins() add in row and column 行和列求和
  2. > View(esoph)
  3. > tt <- table(esoph$agegp,esoph$ncases) #生成四格表
  4. > addmargins(tt,margin = c(1,2)) # margin:1 is row and 2 is column
  5. 0 1 2 3 4 5 6 8 9 17 Sum
  6. 25-34 14 1 0 0 0 0 0 0 0 0 15
  7. 35-44 10 2 2 1 0 0 0 0 0 0 15
  8. 45-54 3 2 2 2 3 2 2 0 0 0 16
  9. 55-64 0 0 2 4 3 2 2 1 2 0 16
  10. 65-74 1 4 2 2 2 2 1 0 0 1 15
  11. 75+ 1 7 3 0 0 0 0 0 0 0 11
  12. Sum 29 16 11 9 8 6 5 1 2 1 88

xtabs()生成类四格表

  1. > # xtabs()生成类四格表
  2. > as.data.frame(xtabs(~tips$sex+hightip,subset = tips$smoker=="Yes"))
  3. # ~左边的连续型数据按照右边的分类数据统计,缺省表示直接统计右边的数据频数
  4. tips.sex hightip Freq
  5. 1 Female FALSE 17
  6. 2 Male FALSE 26
  7. 3 Female TRUE 16
  8. 4 Male TRUE 34
  9. > as.data.frame(xtabs(~tips$sex+hightip,
  10. + subset = tips$day %in% c("Sun","Sat")))
  11. tips.sex hightip Freq
  12. 1 Female FALSE 21
  13. 2 Male FALSE 53
  14. 3 Female TRUE 25
  15. 4 Male TRUE 64
  16. > xtabs(ncontrols~agegp+alcgp,data=esoph)
  17. alcgp
  18. agegp 0-39g/day 40-79 80-119 120+
  19. 25-34 61 45 5 5
  20. 35-44 89 80 20 10
  21. 45-54 78 81 39 15
  22. 55-64 89 84 43 26
  23. 65-74 71 53 29 8
  24. 75+ 27 12 2 3
  25. > addmargins(xtabs(ncontrols~agegp+alcgp,data=esoph),margin=c(1,2))
  26. alcgp
  27. agegp 0-39g/day 40-79 80-119 120+ Sum
  28. 25-34 61 45 5 5 116
  29. 35-44 89 80 20 10 199
  30. 45-54 78 81 39 15 213
  31. 55-64 89 84 43 26 242
  32. 65-74 71 53 29 8 161
  33. 75+ 27 12 2 3 44
  34. Sum 415 355 138 67 975
  35. > xtabs(ncases+ncontrols~agegp+alcgp,data=esoph) # ncases+ncontrols和相加了
  36. alcgp
  37. agegp 0-39g/day 40-79 80-119 120+
  38. 25-34 61 45 5 6
  39. 35-44 90 84 20 14
  40. 45-54 79 101 51 28
  41. 55-64 101 106 67 44
  42. 65-74 82 78 42 14
  43. 75+ 31 16 4 6
  44. > xtabs(c(ncases,ncontrols)~agegp+alcgp,data=esoph) #报错
  45. Error in model.frame.default(formula = c(ncases, ncontrols) ~ agegp + :
  46. 变数的长度不一样('agegp')
  47. > xtabs(cbind(ncases,ncontrols)~agegp+alcgp,data=esoph) # 这个方法对了,分别计算了cases+ncontrols
  48. , , = ncases
  49. alcgp
  50. agegp 0-39g/day 40-79 80-119 120+
  51. 25-34 0 0 0 1
  52. 35-44 1 4 0 4
  53. 45-54 1 20 12 13
  54. 55-64 12 22 24 18
  55. 65-74 11 25 13 6
  56. 75+ 4 4 2 3
  57. , , = ncontrols
  58. alcgp
  59. agegp 0-39g/day 40-79 80-119 120+
  60. 25-34 61 45 5 5
  61. 35-44 89 80 20 10
  62. 45-54 78 81 39 15
  63. 55-64 89 84 43 26
  64. 65-74 71 53 29 8
  65. 75+ 27 12 2 3
  66. > ftable(xtabs(cbind(ncases,ncontrols)~agegp+alcgp,data = esoph)) #ftable,操作xtabs对象,变成扁平表格
  67. ncases ncontrols
  68. agegp alcgp
  69. 25-34 0-39g/day 0 61
  70. 40-79 0 45
  71. 80-119 0 5
  72. 120+ 1 5
  73. 35-44 0-39g/day 1 89
  74. 40-79 4 80
  75. 80-119 0 20
  76. 120+ 4 10
  77. 45-54 0-39g/day 1 78
  78. 40-79 20 81
  79. 80-119 12 39
  80. 120+ 13 15
  81. 55-64 0-39g/day 12 89
  82. 40-79 22 84
  83. 80-119 24 43
  84. 120+ 18 26
  85. 65-74 0-39g/day 11 71
  86. 40-79 25 53
  87. 80-119 13 29
  88. 120+ 6 8
  89. 75+ 0-39g/day 4 27
  90. 40-79 4 12
  91. 80-119 2 2
  92. 120+ 3 3

总结函数summary,describe

  1. > summary(iris)
  2. Sepal.Length Sepal.Width Petal.Length
  3. Min. :4.300 Min. :2.000 Min. :1.000
  4. 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600
  5. Median :5.800 Median :3.000 Median :4.350
  6. Mean :5.843 Mean :3.057 Mean :3.758
  7. 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100
  8. Max. :7.900 Max. :4.400 Max. :6.900
  9. Petal.Width Species
  10. Min. :0.100 setosa :50
  11. 1st Qu.:0.300 versicolor:50
  12. Median :1.300 virginica :50
  13. Mean :1.199
  14. 3rd Qu.:1.800
  15. Max. :2.500
  16. > library(psych)
  17. > psych::describe(iris) #比summary内容更过
  18. vars n mean sd median trimmed mad min max
  19. Sepal.Length 1 150 5.84 0.83 5.80 5.81 1.04 4.3 7.9
  20. Sepal.Width 2 150 3.06 0.44 3.00 3.04 0.44 2.0 4.4
  21. Petal.Length 3 150 3.76 1.77 4.35 3.76 1.85 1.0 6.9
  22. Petal.Width 4 150 1.20 0.76 1.30 1.18 1.04 0.1 2.5
  23. Species* 5 150 2.00 0.82 2.00 2.00 1.48 1.0 3.0
  24. range skew kurtosis se
  25. Sepal.Length 3.6 0.31 -0.61 0.07
  26. Sepal.Width 2.4 0.31 0.14 0.04
  27. Petal.Length 5.9 -0.27 -1.42 0.14
  28. Petal.Width 2.4 -0.10 -1.36 0.06
  29. Species* 2.0 0.00 -1.52 0.07
  30. > library(Hmisc)
  31. > describe.data.frame(iris) #现在变成了Hmisc describe
  32. Error in describe.data.frame(iris) : 没有"describe.data.frame"这个函数
  33. > Hmisc::describe(iris)
  34. iris
  35. 5 Variables 150 Observations
  36. ---------------------------------------------------------------
  37. Sepal.Length
  38. n missing distinct Info Mean Gmd .05
  39. 150 0 35 0.998 5.843 0.9462 4.600
  40. .10 .25 .50 .75 .90 .95
  41. 4.800 5.100 5.800 6.400 6.900 7.255
  42. lowest : 4.3 4.4 4.5 4.6 4.7, highest: 7.3 7.4 7.6 7.7 7.9
  43. ---------------------------------------------------------------
  44. Sepal.Width
  45. n missing distinct Info Mean Gmd .05
  46. 150 0 23 0.992 3.057 0.4872 2.345
  47. .10 .25 .50 .75 .90 .95
  48. 2.500 2.800 3.000 3.300 3.610 3.800
  49. lowest : 2.0 2.2 2.3 2.4 2.5, highest: 3.9 4.0 4.1 4.2 4.4
  50. ---------------------------------------------------------------
  51. Petal.Length
  52. n missing distinct Info Mean Gmd .05
  53. 150 0 43 0.998 3.758 1.979 1.30
  54. .10 .25 .50 .75 .90 .95
  55. 1.40 1.60 4.35 5.10 5.80 6.10
  56. lowest : 1.0 1.1 1.2 1.3 1.4, highest: 6.3 6.4 6.6 6.7 6.9
  57. ---------------------------------------------------------------
  58. Petal.Width
  59. n missing distinct Info Mean Gmd .05
  60. 150 0 22 0.99 1.199 0.8676 0.2
  61. .10 .25 .50 .75 .90 .95
  62. 0.2 0.3 1.3 1.8 2.2 2.3
  63. lowest : 0.1 0.2 0.3 0.4 0.5, highest: 2.1 2.2 2.3 2.4 2.5
  64. ---------------------------------------------------------------
  65. Species
  66. n missing distinct
  67. 150 0 3
  68. Value setosa versicolor virginica
  69. Frequency 50 50 50
  70. Proportion 0.333 0.333 0.333
  71. ---------------------------------------------------------------
  72. > describeData(iris)
  73. n.obs = 150 of which 150 are complete cases. Number of variables = 5 of which all are numeric TRUE
  74. variable # n.obs type H1 H2 H3 H4
  75. Sepal.Length 1 150 1 5.1 4.9 4.7 4.6
  76. Sepal.Width 2 150 1 3.5 3.0 3.2 3.1
  77. Petal.Length 3 150 1 1.4 1.4 1.3 1.5
  78. Petal.Width 4 150 1 0.2 0.2 0.2 0.2
  79. Species* 5 150 2 setosa setosa setosa setosa
  80. T1 T2 T3 T4
  81. Sepal.Length 6.3 6.5 6.2 5.9
  82. Sepal.Width 2.5 3.0 3.4 3.0
  83. Petal.Length 5.0 5.2 5.4 5.1
  84. Petal.Width 1.9 2.0 2.3 1.8
  85. Species* virginica virginica virginica virginica