例如我们根据某个分组,计算他的多个分位数

  1. > library(dplyr)
  2. > library(tidyr)
  3. >
  4. > Z <- data.frame(x = runif(1000, min = 0, max = 20)) %>%
  5. + mutate(y = rnorm(n(), mean = sin(x))) %>%
  6. + group_by(x.category = round(x))
  7. > Z
  8. # A tibble: 1,000 x 3
  9. # Groups: x.category [21]
  10. x y x.category
  11. <dbl> <dbl> <dbl>
  12. 1 0.670 0.121 1
  13. 2 16.5 0.0702 16
  14. 3 15.0 -1.47 15
  15. 4 3.16 -0.595 3
  16. 5 12.7 -0.915 13
  17. 6 5.25 -0.540 5
  18. 7 3.82 -0.671 4
  19. 8 10.6 -2.33 11
  20. 9 18.3 1.15 18
  21. 10 1.53 0.205 2
  22. # … with 990 more rows
  23. Z %>%
  24. summarize(x = mean(x),
  25. y25 = quantile(y, probs = .25),
  26. y50 = quantile(y, probs = .5),
  27. y75 = quantile(y, probs = .75)) %>%
  28. gather(Statistic, y, -x, -x.category)
  29. #上述是比较繁琐的方法,如果有多个分位数可能就要写很多次。
  30. #那么如何一次性生成多列呢

解法一

  1. probs <- c(0.25, 0.5, 0.75)
  2. Z %>%
  3. summarize(x = mean(x),
  4. quantile = list(quantile(y,probs)),
  5. prob = list(probs)) %>%
  6. unnest(cols = c("quantile", "prob"))
  7. # A tibble: 63 x 4
  8. # x.category x quantile prob
  9. # <dbl> <dbl> <dbl> <dbl>
  10. # 1 0 0.260 -0.527 0.25
  11. # 2 0 0.260 0.247 0.5
  12. # 3 0 0.260 0.704 0.75
  13. # 4 1 0.997 0.449 0.25
  14. # 5 1 0.997 0.912 0.5
  15. # 6 1 0.997 1.61 0.75
  16. # 7 2 2.00 -0.219 0.25
  17. # 8 2 2.00 0.531 0.5
  18. # 9 2 2.00 1.48 0.75
  19. # 10 3 2.93 -0.498 0.25
  20. # ... with 53 more rows

解法二

  1. q = c(0.25, 0.5, 0.75)
  2. Z %>%
  3. summarise(x = mean(x),
  4. qtls = paste(quantile(y, q), collapse = ",")) %>%
  5. separate(qtls, paste0("y_", 100*q), sep = ",", convert = T)