table() 统计函数
> # table() 统计函数> library(reshape2) #tips数据集在reshape2包里> View(tips)> table(tips$sex, tips$smoker) No Yes Female 54 33 Male 97 60> table(tips$sex, tips$day) Fri Sat Sun Thur Female 9 28 18 32 Male 10 59 58 30> hightip <- tips[,"tip"] > mean(tips[,"tip"]) #返回逻辑值> head(hightip)[1] FALSE FALSE TRUE TRUE TRUE TRUE
addmargins() 行和列分别求和
> # addmargins() add in row and column 行和列求和> View(esoph)> tt <- table(esoph$agegp,esoph$ncases) #生成四格表> addmargins(tt,margin = c(1,2)) # margin:1 is row and 2 is column 0 1 2 3 4 5 6 8 9 17 Sum 25-34 14 1 0 0 0 0 0 0 0 0 15 35-44 10 2 2 1 0 0 0 0 0 0 15 45-54 3 2 2 2 3 2 2 0 0 0 16 55-64 0 0 2 4 3 2 2 1 2 0 16 65-74 1 4 2 2 2 2 1 0 0 1 15 75+ 1 7 3 0 0 0 0 0 0 0 11 Sum 29 16 11 9 8 6 5 1 2 1 88
xtabs()生成类四格表
> # xtabs()生成类四格表> as.data.frame(xtabs(~tips$sex+hightip,subset = tips$smoker=="Yes")) # ~左边的连续型数据按照右边的分类数据统计,缺省表示直接统计右边的数据频数 tips.sex hightip Freq1 Female FALSE 172 Male FALSE 263 Female TRUE 164 Male TRUE 34> as.data.frame(xtabs(~tips$sex+hightip,+ subset = tips$day %in% c("Sun","Sat"))) tips.sex hightip Freq1 Female FALSE 212 Male FALSE 533 Female TRUE 254 Male TRUE 64> xtabs(ncontrols~agegp+alcgp,data=esoph) alcgpagegp 0-39g/day 40-79 80-119 120+ 25-34 61 45 5 5 35-44 89 80 20 10 45-54 78 81 39 15 55-64 89 84 43 26 65-74 71 53 29 8 75+ 27 12 2 3> addmargins(xtabs(ncontrols~agegp+alcgp,data=esoph),margin=c(1,2)) alcgpagegp 0-39g/day 40-79 80-119 120+ Sum 25-34 61 45 5 5 116 35-44 89 80 20 10 199 45-54 78 81 39 15 213 55-64 89 84 43 26 242 65-74 71 53 29 8 161 75+ 27 12 2 3 44 Sum 415 355 138 67 975> xtabs(ncases+ncontrols~agegp+alcgp,data=esoph) # ncases+ncontrols和相加了 alcgpagegp 0-39g/day 40-79 80-119 120+ 25-34 61 45 5 6 35-44 90 84 20 14 45-54 79 101 51 28 55-64 101 106 67 44 65-74 82 78 42 14 75+ 31 16 4 6> xtabs(c(ncases,ncontrols)~agegp+alcgp,data=esoph) #报错Error in model.frame.default(formula = c(ncases, ncontrols) ~ agegp + : 变数的长度不一样('agegp')> xtabs(cbind(ncases,ncontrols)~agegp+alcgp,data=esoph) # 这个方法对了,分别计算了cases+ncontrols, , = ncases alcgpagegp 0-39g/day 40-79 80-119 120+ 25-34 0 0 0 1 35-44 1 4 0 4 45-54 1 20 12 13 55-64 12 22 24 18 65-74 11 25 13 6 75+ 4 4 2 3, , = ncontrols alcgpagegp 0-39g/day 40-79 80-119 120+ 25-34 61 45 5 5 35-44 89 80 20 10 45-54 78 81 39 15 55-64 89 84 43 26 65-74 71 53 29 8 75+ 27 12 2 3> ftable(xtabs(cbind(ncases,ncontrols)~agegp+alcgp,data = esoph)) #ftable,操作xtabs对象,变成扁平表格 ncases ncontrolsagegp alcgp 25-34 0-39g/day 0 61 40-79 0 45 80-119 0 5 120+ 1 535-44 0-39g/day 1 89 40-79 4 80 80-119 0 20 120+ 4 1045-54 0-39g/day 1 78 40-79 20 81 80-119 12 39 120+ 13 1555-64 0-39g/day 12 89 40-79 22 84 80-119 24 43 120+ 18 2665-74 0-39g/day 11 71 40-79 25 53 80-119 13 29 120+ 6 875+ 0-39g/day 4 27 40-79 4 12 80-119 2 2 120+ 3 3
总结函数summary,describe
> summary(iris) Sepal.Length Sepal.Width Petal.Length Min. :4.300 Min. :2.000 Min. :1.000 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 Median :5.800 Median :3.000 Median :4.350 Mean :5.843 Mean :3.057 Mean :3.758 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 Max. :7.900 Max. :4.400 Max. :6.900 Petal.Width Species Min. :0.100 setosa :50 1st Qu.:0.300 versicolor:50 Median :1.300 virginica :50 Mean :1.199 3rd Qu.:1.800 Max. :2.500 > library(psych)> psych::describe(iris) #比summary内容更过 vars n mean sd median trimmed mad min maxSepal.Length 1 150 5.84 0.83 5.80 5.81 1.04 4.3 7.9Sepal.Width 2 150 3.06 0.44 3.00 3.04 0.44 2.0 4.4Petal.Length 3 150 3.76 1.77 4.35 3.76 1.85 1.0 6.9Petal.Width 4 150 1.20 0.76 1.30 1.18 1.04 0.1 2.5Species* 5 150 2.00 0.82 2.00 2.00 1.48 1.0 3.0 range skew kurtosis seSepal.Length 3.6 0.31 -0.61 0.07Sepal.Width 2.4 0.31 0.14 0.04Petal.Length 5.9 -0.27 -1.42 0.14Petal.Width 2.4 -0.10 -1.36 0.06Species* 2.0 0.00 -1.52 0.07> library(Hmisc)> describe.data.frame(iris) #现在变成了Hmisc describeError in describe.data.frame(iris) : 没有"describe.data.frame"这个函数> Hmisc::describe(iris)iris 5 Variables 150 Observations---------------------------------------------------------------Sepal.Length n missing distinct Info Mean Gmd .05 150 0 35 0.998 5.843 0.9462 4.600 .10 .25 .50 .75 .90 .95 4.800 5.100 5.800 6.400 6.900 7.255 lowest : 4.3 4.4 4.5 4.6 4.7, highest: 7.3 7.4 7.6 7.7 7.9---------------------------------------------------------------Sepal.Width n missing distinct Info Mean Gmd .05 150 0 23 0.992 3.057 0.4872 2.345 .10 .25 .50 .75 .90 .95 2.500 2.800 3.000 3.300 3.610 3.800 lowest : 2.0 2.2 2.3 2.4 2.5, highest: 3.9 4.0 4.1 4.2 4.4---------------------------------------------------------------Petal.Length n missing distinct Info Mean Gmd .05 150 0 43 0.998 3.758 1.979 1.30 .10 .25 .50 .75 .90 .95 1.40 1.60 4.35 5.10 5.80 6.10 lowest : 1.0 1.1 1.2 1.3 1.4, highest: 6.3 6.4 6.6 6.7 6.9---------------------------------------------------------------Petal.Width n missing distinct Info Mean Gmd .05 150 0 22 0.99 1.199 0.8676 0.2 .10 .25 .50 .75 .90 .95 0.2 0.3 1.3 1.8 2.2 2.3 lowest : 0.1 0.2 0.3 0.4 0.5, highest: 2.1 2.2 2.3 2.4 2.5---------------------------------------------------------------Species n missing distinct 150 0 3 Value setosa versicolor virginicaFrequency 50 50 50Proportion 0.333 0.333 0.333---------------------------------------------------------------> describeData(iris)n.obs = 150 of which 150 are complete cases. Number of variables = 5 of which all are numeric TRUE variable # n.obs type H1 H2 H3 H4Sepal.Length 1 150 1 5.1 4.9 4.7 4.6Sepal.Width 2 150 1 3.5 3.0 3.2 3.1Petal.Length 3 150 1 1.4 1.4 1.3 1.5Petal.Width 4 150 1 0.2 0.2 0.2 0.2Species* 5 150 2 setosa setosa setosa setosa T1 T2 T3 T4Sepal.Length 6.3 6.5 6.2 5.9Sepal.Width 2.5 3.0 3.4 3.0Petal.Length 5.0 5.2 5.4 5.1Petal.Width 1.9 2.0 2.3 1.8Species* virginica virginica virginica virginica