--第一步,计算每类设备,每一步的uv
--产出结果如下,这是第一张中间表
-- mobile step1 uv1
-- mobile step2 uv2
-- .....
-- mobile step7 uv7
-- desktop step1 uv1
-- desktop step2 uv2
-- .....
-- desktop step7 uv7
create table device_step_uv as
select
device, step, count distinct(session_id)
from
visit_log -- 假设这是原始访问日志表
group by device, step
order by device, step asc;
-- 第二步,把每一种设备的每一步uv concat一下
-- 产出结果如下,这是第二张中间表
-- mobile uv1,uv2,...,uv7
-- desktop uv1,uv2,...,uv7
create table device_step_uv_concat as
select
device,
concat_ws(',', collect_set(step)) as step_uv_concat
from device_step_uv
group by device;
-- 第三步,结果表,
-- 针对第二步的中间表,split后 计算各步骤漏斗比例
select
device,
step_1_uv as step1_#,
step_2_uv / step_1_uv as step2_%,
step_3_uv / step_2_uv as step3_%,
step_4_uv / step_3_uv as step4_%,
step_5_uv / step_4_uv as step5_%,
step_6_uv / step_5_uv as step6_%,
step_7_uv / step_6_uv as step7_%
from (
select
device,
cast(split_part(step_uv_concat, 1) as bigint) as step_1_uv, -- 这里暂时假设每一步骤的uv都不为0,实际应该还要考虑为0的情况
cast(split_part(step_uv_concat, 2) as bigint) as step_2_uv,
cast(split_part(step_uv_concat, 3) as bigint) as step_3_uv,
cast(split_part(step_uv_concat, 4) as bigint) as step_4_uv,
cast(split_part(step_uv_concat, 5) as bigint) as step_5_uv,
cast(split_part(step_uv_concat, 6) as bigint) as step_6_uv,
cast(split_part(step_uv_concat, 7) as bigint) as step_7_uv
from
device_step_uv_concat
)t