例如我们根据某个分组,计算他的多个分位数
> library(dplyr)> library(tidyr)>> Z <- data.frame(x = runif(1000, min = 0, max = 20)) %>%+ mutate(y = rnorm(n(), mean = sin(x))) %>%+ group_by(x.category = round(x))> Z# A tibble: 1,000 x 3# Groups: x.category [21]x y x.category<dbl> <dbl> <dbl>1 0.670 0.121 12 16.5 0.0702 163 15.0 -1.47 154 3.16 -0.595 35 12.7 -0.915 136 5.25 -0.540 57 3.82 -0.671 48 10.6 -2.33 119 18.3 1.15 1810 1.53 0.205 2# … with 990 more rowsZ %>%summarize(x = mean(x),y25 = quantile(y, probs = .25),y50 = quantile(y, probs = .5),y75 = quantile(y, probs = .75)) %>%gather(Statistic, y, -x, -x.category)#上述是比较繁琐的方法,如果有多个分位数可能就要写很多次。#那么如何一次性生成多列呢
解法一
probs <- c(0.25, 0.5, 0.75)Z %>%summarize(x = mean(x),quantile = list(quantile(y,probs)),prob = list(probs)) %>%unnest(cols = c("quantile", "prob"))# A tibble: 63 x 4# x.category x quantile prob# <dbl> <dbl> <dbl> <dbl># 1 0 0.260 -0.527 0.25# 2 0 0.260 0.247 0.5# 3 0 0.260 0.704 0.75# 4 1 0.997 0.449 0.25# 5 1 0.997 0.912 0.5# 6 1 0.997 1.61 0.75# 7 2 2.00 -0.219 0.25# 8 2 2.00 0.531 0.5# 9 2 2.00 1.48 0.75# 10 3 2.93 -0.498 0.25# ... with 53 more rows
解法二
q = c(0.25, 0.5, 0.75)Z %>%summarise(x = mean(x),qtls = paste(quantile(y, q), collapse = ",")) %>%separate(qtls, paste0("y_", 100*q), sep = ",", convert = T)
