数据汇总时自动生成多列 - 《R语言学习记录》

解法一
解法二

例如我们根据某个分组，计算他的多个分位数

> library(dplyr)
> library(tidyr)
> 
> Z <- data.frame(x = runif(1000, min = 0, max = 20)) %>%
+     mutate(y = rnorm(n(), mean = sin(x))) %>%
+     group_by(x.category = round(x)) 
> Z
# A tibble: 1,000 x 3
# Groups:   x.category [21]
        x       y x.category
    <dbl>   <dbl>      <dbl>
 1  0.670  0.121           1
 2 16.5    0.0702         16
 3 15.0   -1.47           15
 4  3.16  -0.595           3
 5 12.7   -0.915          13
 6  5.25  -0.540           5
 7  3.82  -0.671           4
 8 10.6   -2.33           11
 9 18.3    1.15           18
10  1.53   0.205           2
# … with 990 more rows
Z %>%
  summarize(x = mean(x),
            y25 = quantile(y, probs = .25),
            y50 = quantile(y, probs = .5),
            y75 = quantile(y, probs = .75)) %>%
  gather(Statistic, y, -x, -x.category)
#上述是比较繁琐的方法，如果有多个分位数可能就要写很多次。
#那么如何一次性生成多列呢

解法一

probs <- c(0.25, 0.5, 0.75)
Z %>%
  summarize(x = mean(x),
            quantile = list(quantile(y,probs)),
            prob = list(probs)) %>% 
  unnest(cols = c("quantile", "prob"))
# A tibble: 63 x 4
#    x.category     x quantile  prob
#         <dbl> <dbl>    <dbl> <dbl>
#  1          0 0.260   -0.527  0.25
#  2          0 0.260    0.247  0.5 
#  3          0 0.260    0.704  0.75
#  4          1 0.997    0.449  0.25
#  5          1 0.997    0.912  0.5 
#  6          1 0.997    1.61   0.75
#  7          2 2.00    -0.219  0.25
#  8          2 2.00     0.531  0.5 
#  9          2 2.00     1.48   0.75
# 10          3 2.93    -0.498  0.25
# ... with 53 more rows

解法二

q = c(0.25, 0.5, 0.75)
Z %>%
     summarise(x = mean(x),
               qtls = paste(quantile(y, q), collapse = ","))  %>%
     separate(qtls, paste0("y_", 100*q), sep = ",", convert = T)