image.png

    1. --第一步,计算每类设备,每一步的uv
    2. --产出结果如下,这是第一张中间表
    3. -- mobile step1 uv1
    4. -- mobile step2 uv2
    5. -- .....
    6. -- mobile step7 uv7
    7. -- desktop step1 uv1
    8. -- desktop step2 uv2
    9. -- .....
    10. -- desktop step7 uv7
    11. create table device_step_uv as
    12. select
    13. device, step, count distinct(session_id)
    14. from
    15. visit_log -- 假设这是原始访问日志表
    16. group by device, step
    17. order by device, step asc;
    18. -- 第二步,把每一种设备的每一步uv concat一下
    19. -- 产出结果如下,这是第二张中间表
    20. -- mobile uv1,uv2,...,uv7
    21. -- desktop uv1,uv2,...,uv7
    22. create table device_step_uv_concat as
    23. select
    24. device,
    25. concat_ws(',', collect_set(step)) as step_uv_concat
    26. from device_step_uv
    27. group by device;
    28. -- 第三步,结果表,
    29. -- 针对第二步的中间表,split 计算各步骤漏斗比例
    30. select
    31. device,
    32. step_1_uv as step1_#,
    33. step_2_uv / step_1_uv as step2_%,
    34. step_3_uv / step_2_uv as step3_%,
    35. step_4_uv / step_3_uv as step4_%,
    36. step_5_uv / step_4_uv as step5_%,
    37. step_6_uv / step_5_uv as step6_%,
    38. step_7_uv / step_6_uv as step7_%
    39. from (
    40. select
    41. device,
    42. cast(split_part(step_uv_concat, 1) as bigint) as step_1_uv, -- 这里暂时假设每一步骤的uv都不为0,实际应该还要考虑为0的情况
    43. cast(split_part(step_uv_concat, 2) as bigint) as step_2_uv,
    44. cast(split_part(step_uv_concat, 3) as bigint) as step_3_uv,
    45. cast(split_part(step_uv_concat, 4) as bigint) as step_4_uv,
    46. cast(split_part(step_uv_concat, 5) as bigint) as step_5_uv,
    47. cast(split_part(step_uv_concat, 6) as bigint) as step_6_uv,
    48. cast(split_part(step_uv_concat, 7) as bigint) as step_7_uv
    49. from
    50. device_step_uv_concat
    51. )t