R统计 - R统计1_描述性统计分析 - 《C020101_R》

table() 统计函数

> # table() 统计函数
> library(reshape2)  #tips数据集在reshape2包里
> View(tips)
> table(tips$sex, tips$smoker)
         No Yes
  Female 54  33
  Male   97  60
> table(tips$sex, tips$day)
         Fri Sat Sun Thur
  Female   9  28  18   32
  Male    10  59  58   30
> hightip <- tips[,"tip"] > mean(tips[,"tip"])  #返回逻辑值
> head(hightip)
[1] FALSE FALSE  TRUE  TRUE  TRUE  TRUE

addmargins() 行和列分别求和

> # addmargins()   add in row and column  行和列求和
> View(esoph)
> tt <- table(esoph$agegp,esoph$ncases)  #生成四格表
> addmargins(tt,margin = c(1,2))    # margin:1 is row and 2 is column
         0  1  2  3  4  5  6  8  9 17 Sum
  25-34 14  1  0  0  0  0  0  0  0  0  15
  35-44 10  2  2  1  0  0  0  0  0  0  15
  45-54  3  2  2  2  3  2  2  0  0  0  16
  55-64  0  0  2  4  3  2  2  1  2  0  16
  65-74  1  4  2  2  2  2  1  0  0  1  15
  75+    1  7  3  0  0  0  0  0  0  0  11
  Sum   29 16 11  9  8  6  5  1  2  1  88

xtabs()生成类四格表

> # xtabs()生成类四格表
> as.data.frame(xtabs(~tips$sex+hightip,subset = tips$smoker=="Yes"))  
# ~左边的连续型数据按照右边的分类数据统计，缺省表示直接统计右边的数据频数
  tips.sex hightip Freq
1   Female   FALSE   17
2     Male   FALSE   26
3   Female    TRUE   16
4     Male    TRUE   34
> as.data.frame(xtabs(~tips$sex+hightip,
+                     subset = tips$day %in% c("Sun","Sat")))
  tips.sex hightip Freq
1   Female   FALSE   21
2     Male   FALSE   53
3   Female    TRUE   25
4     Male    TRUE   64
> xtabs(ncontrols~agegp+alcgp,data=esoph)
       alcgp
agegp   0-39g/day 40-79 80-119 120+
  25-34        61    45      5    5
  35-44        89    80     20   10
  45-54        78    81     39   15
  55-64        89    84     43   26
  65-74        71    53     29    8
  75+          27    12      2    3
> addmargins(xtabs(ncontrols~agegp+alcgp,data=esoph),margin=c(1,2))
       alcgp
agegp   0-39g/day 40-79 80-119 120+ Sum
  25-34        61    45      5    5 116
  35-44        89    80     20   10 199
  45-54        78    81     39   15 213
  55-64        89    84     43   26 242
  65-74        71    53     29    8 161
  75+          27    12      2    3  44
  Sum         415   355    138   67 975
> xtabs(ncases+ncontrols~agegp+alcgp,data=esoph)  # ncases+ncontrols和相加了
       alcgp
agegp   0-39g/day 40-79 80-119 120+
  25-34        61    45      5    6
  35-44        90    84     20   14
  45-54        79   101     51   28
  55-64       101   106     67   44
  65-74        82    78     42   14
  75+          31    16      4    6
> xtabs(c(ncases,ncontrols)~agegp+alcgp,data=esoph) #报错
Error in model.frame.default(formula = c(ncases, ncontrols) ~ agegp +  : 
  变数的长度不一样('agegp')
> xtabs(cbind(ncases,ncontrols)~agegp+alcgp,data=esoph) # 这个方法对了，分别计算了cases+ncontrols
, ,  = ncases
       alcgp
agegp   0-39g/day 40-79 80-119 120+
  25-34         0     0      0    1
  35-44         1     4      0    4
  45-54         1    20     12   13
  55-64        12    22     24   18
  65-74        11    25     13    6
  75+           4     4      2    3
, ,  = ncontrols
       alcgp
agegp   0-39g/day 40-79 80-119 120+
  25-34        61    45      5    5
  35-44        89    80     20   10
  45-54        78    81     39   15
  55-64        89    84     43   26
  65-74        71    53     29    8
  75+          27    12      2    3
> ftable(xtabs(cbind(ncases,ncontrols)~agegp+alcgp,data = esoph)) #ftable，操作xtabs对象，变成扁平表格
                 ncases ncontrols
agegp alcgp                      
25-34 0-39g/day       0        61
      40-79           0        45
      80-119          0         5
      120+            1         5
35-44 0-39g/day       1        89
      40-79           4        80
      80-119          0        20
      120+            4        10
45-54 0-39g/day       1        78
      40-79          20        81
      80-119         12        39
      120+           13        15
55-64 0-39g/day      12        89
      40-79          22        84
      80-119         24        43
      120+           18        26
65-74 0-39g/day      11        71
      40-79          25        53
      80-119         13        29
      120+            6         8
75+   0-39g/day       4        27
      40-79           4        12
      80-119          2         2
      120+            3         3

总结函数summary，describe

> summary(iris)
  Sepal.Length    Sepal.Width     Petal.Length  
 Min.   :4.300   Min.   :2.000   Min.   :1.000  
 1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600  
 Median :5.800   Median :3.000   Median :4.350  
 Mean   :5.843   Mean   :3.057   Mean   :3.758  
 3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100  
 Max.   :7.900   Max.   :4.400   Max.   :6.900  
  Petal.Width          Species  
 Min.   :0.100   setosa    :50  
 1st Qu.:0.300   versicolor:50  
 Median :1.300   virginica :50  
 Mean   :1.199                  
 3rd Qu.:1.800                  
 Max.   :2.500     
> library(psych)
> psych::describe(iris)  #比summary内容更过
             vars   n mean   sd median trimmed  mad min max
Sepal.Length    1 150 5.84 0.83   5.80    5.81 1.04 4.3 7.9
Sepal.Width     2 150 3.06 0.44   3.00    3.04 0.44 2.0 4.4
Petal.Length    3 150 3.76 1.77   4.35    3.76 1.85 1.0 6.9
Petal.Width     4 150 1.20 0.76   1.30    1.18 1.04 0.1 2.5
Species*        5 150 2.00 0.82   2.00    2.00 1.48 1.0 3.0
             range  skew kurtosis   se
Sepal.Length   3.6  0.31    -0.61 0.07
Sepal.Width    2.4  0.31     0.14 0.04
Petal.Length   5.9 -0.27    -1.42 0.14
Petal.Width    2.4 -0.10    -1.36 0.06
Species*       2.0  0.00    -1.52 0.07
> library(Hmisc)
> describe.data.frame(iris)  #现在变成了Hmisc describe
Error in describe.data.frame(iris) : 没有"describe.data.frame"这个函数
> Hmisc::describe(iris)
iris 
 5  Variables      150  Observations
---------------------------------------------------------------
Sepal.Length 
       n  missing distinct     Info     Mean      Gmd      .05 
     150        0       35    0.998    5.843   0.9462    4.600 
     .10      .25      .50      .75      .90      .95 
   4.800    5.100    5.800    6.400    6.900    7.255 
lowest : 4.3 4.4 4.5 4.6 4.7, highest: 7.3 7.4 7.6 7.7 7.9
---------------------------------------------------------------
Sepal.Width 
       n  missing distinct     Info     Mean      Gmd      .05 
     150        0       23    0.992    3.057   0.4872    2.345 
     .10      .25      .50      .75      .90      .95 
   2.500    2.800    3.000    3.300    3.610    3.800 
lowest : 2.0 2.2 2.3 2.4 2.5, highest: 3.9 4.0 4.1 4.2 4.4
---------------------------------------------------------------
Petal.Length 
       n  missing distinct     Info     Mean      Gmd      .05 
     150        0       43    0.998    3.758    1.979     1.30 
     .10      .25      .50      .75      .90      .95 
    1.40     1.60     4.35     5.10     5.80     6.10 
lowest : 1.0 1.1 1.2 1.3 1.4, highest: 6.3 6.4 6.6 6.7 6.9
---------------------------------------------------------------
Petal.Width 
       n  missing distinct     Info     Mean      Gmd      .05 
     150        0       22     0.99    1.199   0.8676      0.2 
     .10      .25      .50      .75      .90      .95 
     0.2      0.3      1.3      1.8      2.2      2.3 
lowest : 0.1 0.2 0.3 0.4 0.5, highest: 2.1 2.2 2.3 2.4 2.5
---------------------------------------------------------------
Species 
       n  missing distinct 
     150        0        3 
Value          setosa versicolor  virginica
Frequency          50         50         50
Proportion      0.333      0.333      0.333
---------------------------------------------------------------
> describeData(iris)
n.obs =  150 of which  150   are complete cases.   Number of variables =  5  of which all are numeric  TRUE  
             variable # n.obs type     H1     H2     H3     H4
Sepal.Length          1   150    1    5.1    4.9    4.7    4.6
Sepal.Width           2   150    1    3.5    3.0    3.2    3.1
Petal.Length          3   150    1    1.4    1.4    1.3    1.5
Petal.Width           4   150    1    0.2    0.2    0.2    0.2
Species*              5   150    2 setosa setosa setosa setosa
                    T1        T2        T3        T4
Sepal.Length       6.3       6.5       6.2       5.9
Sepal.Width        2.5       3.0       3.4       3.0
Petal.Length       5.0       5.2       5.4       5.1
Petal.Width        1.9       2.0       2.3       1.8
Species*     virginica virginica virginica virginica