table() 统计函数
> # table() 统计函数
> library(reshape2) #tips数据集在reshape2包里
> View(tips)
> table(tips$sex, tips$smoker)
No Yes
Female 54 33
Male 97 60
> table(tips$sex, tips$day)
Fri Sat Sun Thur
Female 9 28 18 32
Male 10 59 58 30
> hightip <- tips[,"tip"] > mean(tips[,"tip"]) #返回逻辑值
> head(hightip)
[1] FALSE FALSE TRUE TRUE TRUE TRUE
addmargins() 行和列分别求和
> # addmargins() add in row and column 行和列求和
> View(esoph)
> tt <- table(esoph$agegp,esoph$ncases) #生成四格表
> addmargins(tt,margin = c(1,2)) # margin:1 is row and 2 is column
0 1 2 3 4 5 6 8 9 17 Sum
25-34 14 1 0 0 0 0 0 0 0 0 15
35-44 10 2 2 1 0 0 0 0 0 0 15
45-54 3 2 2 2 3 2 2 0 0 0 16
55-64 0 0 2 4 3 2 2 1 2 0 16
65-74 1 4 2 2 2 2 1 0 0 1 15
75+ 1 7 3 0 0 0 0 0 0 0 11
Sum 29 16 11 9 8 6 5 1 2 1 88
xtabs()生成类四格表
> # xtabs()生成类四格表
> as.data.frame(xtabs(~tips$sex+hightip,subset = tips$smoker=="Yes"))
# ~左边的连续型数据按照右边的分类数据统计,缺省表示直接统计右边的数据频数
tips.sex hightip Freq
1 Female FALSE 17
2 Male FALSE 26
3 Female TRUE 16
4 Male TRUE 34
> as.data.frame(xtabs(~tips$sex+hightip,
+ subset = tips$day %in% c("Sun","Sat")))
tips.sex hightip Freq
1 Female FALSE 21
2 Male FALSE 53
3 Female TRUE 25
4 Male TRUE 64
> xtabs(ncontrols~agegp+alcgp,data=esoph)
alcgp
agegp 0-39g/day 40-79 80-119 120+
25-34 61 45 5 5
35-44 89 80 20 10
45-54 78 81 39 15
55-64 89 84 43 26
65-74 71 53 29 8
75+ 27 12 2 3
> addmargins(xtabs(ncontrols~agegp+alcgp,data=esoph),margin=c(1,2))
alcgp
agegp 0-39g/day 40-79 80-119 120+ Sum
25-34 61 45 5 5 116
35-44 89 80 20 10 199
45-54 78 81 39 15 213
55-64 89 84 43 26 242
65-74 71 53 29 8 161
75+ 27 12 2 3 44
Sum 415 355 138 67 975
> xtabs(ncases+ncontrols~agegp+alcgp,data=esoph) # ncases+ncontrols和相加了
alcgp
agegp 0-39g/day 40-79 80-119 120+
25-34 61 45 5 6
35-44 90 84 20 14
45-54 79 101 51 28
55-64 101 106 67 44
65-74 82 78 42 14
75+ 31 16 4 6
> xtabs(c(ncases,ncontrols)~agegp+alcgp,data=esoph) #报错
Error in model.frame.default(formula = c(ncases, ncontrols) ~ agegp + :
变数的长度不一样('agegp')
> xtabs(cbind(ncases,ncontrols)~agegp+alcgp,data=esoph) # 这个方法对了,分别计算了cases+ncontrols
, , = ncases
alcgp
agegp 0-39g/day 40-79 80-119 120+
25-34 0 0 0 1
35-44 1 4 0 4
45-54 1 20 12 13
55-64 12 22 24 18
65-74 11 25 13 6
75+ 4 4 2 3
, , = ncontrols
alcgp
agegp 0-39g/day 40-79 80-119 120+
25-34 61 45 5 5
35-44 89 80 20 10
45-54 78 81 39 15
55-64 89 84 43 26
65-74 71 53 29 8
75+ 27 12 2 3
> ftable(xtabs(cbind(ncases,ncontrols)~agegp+alcgp,data = esoph)) #ftable,操作xtabs对象,变成扁平表格
ncases ncontrols
agegp alcgp
25-34 0-39g/day 0 61
40-79 0 45
80-119 0 5
120+ 1 5
35-44 0-39g/day 1 89
40-79 4 80
80-119 0 20
120+ 4 10
45-54 0-39g/day 1 78
40-79 20 81
80-119 12 39
120+ 13 15
55-64 0-39g/day 12 89
40-79 22 84
80-119 24 43
120+ 18 26
65-74 0-39g/day 11 71
40-79 25 53
80-119 13 29
120+ 6 8
75+ 0-39g/day 4 27
40-79 4 12
80-119 2 2
120+ 3 3
总结函数summary,describe
> summary(iris)
Sepal.Length Sepal.Width Petal.Length
Min. :4.300 Min. :2.000 Min. :1.000
1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600
Median :5.800 Median :3.000 Median :4.350
Mean :5.843 Mean :3.057 Mean :3.758
3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100
Max. :7.900 Max. :4.400 Max. :6.900
Petal.Width Species
Min. :0.100 setosa :50
1st Qu.:0.300 versicolor:50
Median :1.300 virginica :50
Mean :1.199
3rd Qu.:1.800
Max. :2.500
> library(psych)
> psych::describe(iris) #比summary内容更过
vars n mean sd median trimmed mad min max
Sepal.Length 1 150 5.84 0.83 5.80 5.81 1.04 4.3 7.9
Sepal.Width 2 150 3.06 0.44 3.00 3.04 0.44 2.0 4.4
Petal.Length 3 150 3.76 1.77 4.35 3.76 1.85 1.0 6.9
Petal.Width 4 150 1.20 0.76 1.30 1.18 1.04 0.1 2.5
Species* 5 150 2.00 0.82 2.00 2.00 1.48 1.0 3.0
range skew kurtosis se
Sepal.Length 3.6 0.31 -0.61 0.07
Sepal.Width 2.4 0.31 0.14 0.04
Petal.Length 5.9 -0.27 -1.42 0.14
Petal.Width 2.4 -0.10 -1.36 0.06
Species* 2.0 0.00 -1.52 0.07
> library(Hmisc)
> describe.data.frame(iris) #现在变成了Hmisc describe
Error in describe.data.frame(iris) : 没有"describe.data.frame"这个函数
> Hmisc::describe(iris)
iris
5 Variables 150 Observations
---------------------------------------------------------------
Sepal.Length
n missing distinct Info Mean Gmd .05
150 0 35 0.998 5.843 0.9462 4.600
.10 .25 .50 .75 .90 .95
4.800 5.100 5.800 6.400 6.900 7.255
lowest : 4.3 4.4 4.5 4.6 4.7, highest: 7.3 7.4 7.6 7.7 7.9
---------------------------------------------------------------
Sepal.Width
n missing distinct Info Mean Gmd .05
150 0 23 0.992 3.057 0.4872 2.345
.10 .25 .50 .75 .90 .95
2.500 2.800 3.000 3.300 3.610 3.800
lowest : 2.0 2.2 2.3 2.4 2.5, highest: 3.9 4.0 4.1 4.2 4.4
---------------------------------------------------------------
Petal.Length
n missing distinct Info Mean Gmd .05
150 0 43 0.998 3.758 1.979 1.30
.10 .25 .50 .75 .90 .95
1.40 1.60 4.35 5.10 5.80 6.10
lowest : 1.0 1.1 1.2 1.3 1.4, highest: 6.3 6.4 6.6 6.7 6.9
---------------------------------------------------------------
Petal.Width
n missing distinct Info Mean Gmd .05
150 0 22 0.99 1.199 0.8676 0.2
.10 .25 .50 .75 .90 .95
0.2 0.3 1.3 1.8 2.2 2.3
lowest : 0.1 0.2 0.3 0.4 0.5, highest: 2.1 2.2 2.3 2.4 2.5
---------------------------------------------------------------
Species
n missing distinct
150 0 3
Value setosa versicolor virginica
Frequency 50 50 50
Proportion 0.333 0.333 0.333
---------------------------------------------------------------
> describeData(iris)
n.obs = 150 of which 150 are complete cases. Number of variables = 5 of which all are numeric TRUE
variable # n.obs type H1 H2 H3 H4
Sepal.Length 1 150 1 5.1 4.9 4.7 4.6
Sepal.Width 2 150 1 3.5 3.0 3.2 3.1
Petal.Length 3 150 1 1.4 1.4 1.3 1.5
Petal.Width 4 150 1 0.2 0.2 0.2 0.2
Species* 5 150 2 setosa setosa setosa setosa
T1 T2 T3 T4
Sepal.Length 6.3 6.5 6.2 5.9
Sepal.Width 2.5 3.0 3.4 3.0
Petal.Length 5.0 5.2 5.4 5.1
Petal.Width 1.9 2.0 2.3 1.8
Species* virginica virginica virginica virginica