问题导读:
1、如何设计设备主题?
2、如何设计会员主题(会员主题+漏斗分析)?
3、如何设计商品主题(个数+销量)?
4、如何设计营销主题(用户+商品+购买行为)?
**
一、数仓搭建 - ADS 层
1.1 设备主题
1.1.1 活跃设备数(日、周、月)
需求定义:
日活:当日活跃的设备数
周活:当周活跃的设备数
月活:当月活跃的设备数
1)建表语句
1. drop table if exists ads_uv_count;
2. create external table ads_uv_count(
3. `dt` string COMMENT '统计日期',
4. `day_count` bigint COMMENT '当日用户数量',
5. `wk_count` bigint COMMENT '当周用户数量',
6. `mn_count` bigint COMMENT '当月用户数量',
7. `is_weekend` string COMMENT 'Y,N 是否是周末,用于得到本周最终结果',
8. `is_monthend` string COMMENT 'Y,N 是否是月末,用于得到本月最终结果'
9. ) COMMENT '活跃设备数'
10. row format delimited fields terminated by '\t'
11. location '/warehouse/gmall/ads/ads_uv_count/';
2)导入数据
1. insert into table ads_uv_count
2. select
3. '2020-03-10' dt,
4. daycount.ct,
5. wkcount.ct,
6. mncount.ct,
7. if(date_add(next_day('2020-03-10','MO'),-1)='2020-03-10','Y','N') ,
8. if(last_day('2020-03-10')='2020-03-10','Y','N')
9. from
10. (
11. select
12. '2020-03-10' dt,
13. count(*) ct
14. from dwt_uv_topic
15. where login_date_last='2020-03-10'
16. )daycount join
17. (
18. select
19. '2020-03-10' dt,
20. count (*) ct
21. from dwt_uv_topic
22. where login_date_last>=date_add(next_day('2020-03-10','MO'),-7)
23. and login_date_last<= date_add(next_day('2020-03-10','MO'),-1)
24. ) wkcount on daycount.dt=wkcount.dt
25. join
26. (
27. select
28. '2020-03-10' dt,
29. count (*) ct
30. from dwt_uv_topic
31. where
32. date_format(login_date_last,'yyyy-MM')=date_format('2020-03-10','yyyy-MM')
33. )mncount on daycount.dt=mncount.dt;
34.
复制代码
3)查询导入结果
1. select * from ads_uv_count;
1.1.2 每日新增设备
1)建表语句
1. drop table if exists ads_new_mid_count;
2. create external table ads_new_mid_count
3. (
4. `create_date` string comment '创建时间' ,
5. `new_mid_count` BIGINT comment '新增设备数量'
6. ) COMMENT '每日新增设备信息数量'
7. row format delimited fields terminated by '\t'
8. location '/warehouse/gmall/ads/ads_new_mid_count/';
2)导入数据
1. insert into table ads_new_mid_count
2. select
3. login_date_first,
4. count(*)
5. from dwt_uv_topic
6. where login_date_first='2020-03-10'
7. group by login_date_first;
3)查询导入数据
1. select * from ads_new_mid_count;
2.
复制代码
1.1.3 沉默用户数
需求定义:
沉默用户:只在安装当天启动过,且启动时间是在 7 天前
1)建表语句
1. drop table if exists ads_silent_count;
2. create external table ads_silent_count(
3. `dt` string COMMENT '统计日期',
4. `silent_count` bigint COMMENT '沉默设备数'
5. )
6. row format delimited fields terminated by '\t'
7. location '/warehouse/gmall/ads/ads_silent_count';
8.
复制代码
2)导入 2020-03-20 数据
1. insert into table ads_silent_count
2. select
3. '2020-03-15',
4. count(*)
5. from dwt_uv_topic
6. where login_date_first=login_date_last
7. and login_date_last<=date_add('2020-03-15',-7);
3)查询导入数据
1. select * from ads_silent_count;
1.1.4 本周回流用户数
需求定义:
本周回流用户:上周未活跃,本周活跃的设备,且不是本周新增设备
1)建表语句
1. drop table if exists ads_back_count;
2. create external table ads_back_count(
3. `dt` string COMMENT '统计日期',
4. `wk_dt` string COMMENT '统计日期所在周',
5. `wastage_count` bigint COMMENT '回流设备数'
6. )
7. row format delimited fields terminated by '\t'
8. location '/warehouse/gmall/ads/ads_back_count';
2)导入数据:
1. insert into table ads_back_count
2. select
3. '2020-03-15',
4. count(*)
5. from
6. (
7. select
8. mid_id
9. from dwt_uv_topic
10. where login_date_last>=date_add(next_day('2020-03-15','MO'),-7)
11. and login_date_last<= date_add(next_day('2020-03-15','MO'),-1)
12. and login_date_first<date_add(next_day('2020-03-15','MO'),-7)
13. )current_wk
14. left join
15. (
16. select
17. mid_id
18. from dws_uv_detail_daycount
19. where dt>=date_add(next_day('2020-03-15','MO'),-7*2)
20. and dt<= date_add(next_day('2020-03-15','MO'),-7-1)
21. group by mid_id
22. )last_wk
23. on current_wk.mid_id=last_wk.mid_id
24. where last_wk.mid_id is null;
25.
复制代码
3)查询结果
1. select * from ads_back_count;
1.1.5 流失用户数
需求定义:
流失用户:最近 7 天未活跃的设备
1)建表语句
1. drop table if exists ads_wastage_count;
2. create external table ads_wastage_count(
3. `dt` string COMMENT '统计日期',
4. `wastage_count` bigint COMMENT '流失设备数'
5. )
6. row format delimited fields terminated by '\t'
7. location '/warehouse/gmall/ads/ads_wastage_count';
2)导入 2020-03-20 数据
1. insert into table ads_wastage_count
2. select
3. '2020-03-20',
4. count(*)
5. from
6. (
7. select
8. mid_id
9. from dwt_uv_topic
10. where login_date_last<=date_add('2020-03-20',-7)
11. group by mid_id
12. )t1;
3)查询结果
1. select * from ads_wastage_count;
1.1.6 留存率
1)建表语句
1. drop table if exists ads_user_retention_day_rate;
2. create external table ads_user_retention_day_rate
3. (
4. `stat_date` string comment '统计日期',
5. `create_date` string comment '设备新增日期',
6. `retention_day` int comment '截止当前日期留存天数',
7. `retention_count` bigint comment '留存数量',
8. `new_mid_count` bigint comment '设备新增数量',
9. `retention_ratio` decimal(10,2) comment '留存率'
10. ) COMMENT '每日用户留存情况'
11. row format delimited fields terminated by '\t'
12. location '/warehouse/gmall/ads/ads_user_retention_day_rate/';
2)导入数据
1. insert into table ads_user_retention_day_rate
2. select
3. '2020-03-10',--统计日期
4. date_add('2020-03-10',-1),--新增日期
5. 1,--留存天数
6. sum(if(login_date_first=date_add('2020-03-10',-1) and
7. login_date_last='2020-03-10',1,0)),--2020-03-09 的 1 日留存数
8. sum(if(login_date_first=date_add('2020-03-10',-1),1,0)),--2020-03-09 新增
9. sum(if(login_date_first=date_add('2020-03-10',-1) and
10. login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
11. 1),1,0))*100
12. from dwt_uv_topic
13. union all
14. select
15. '2020-03-10',--统计日期
16. date_add('2020-03-10',-2),--新增日期
17. 2,--留存天数
18. sum(if(login_date_first=date_add('2020-03-10',-2) and
19. login_date_last='2020-03-10',1,0)),--2020-03-08 的 2 日留存数
20. sum(if(login_date_first=date_add('2020-03-10',-2),1,0)),--2020-03-08 新增
21. sum(if(login_date_first=date_add('2020-03-10',-2) and
22. login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
23. 2),1,0))*100
24. from dwt_uv_topic
25. union all
26. select
27. '2020-03-10',--统计日期
28. date_add('2020-03-10',-3),--新增日期
29. 3,--留存天数
30. sum(if(login_date_first=date_add('2020-03-10',-3) and
31. login_date_last='2020-03-10',1,0)),--2020-03-07 的 3 日留存数
32. sum(if(login_date_first=date_add('2020-03-10',-3),1,0)),--2020-03-07 新增
33. sum(if(login_date_first=date_add('2020-03-10',-3) and
34. login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
35. 3),1,0))*100
36. from dwt_uv_topic;
3)查询导入数据
1. select * from ads_user_retention_day_rate;
2.
复制代码
1.1.7 最近连续三周活跃用户数
1)建表语句
1. drop table if exists ads_continuity_wk_count;
2. create external table ads_continuity_wk_count(
3. `dt` string COMMENT '统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日
4. 期',
5. `wk_dt` string COMMENT '持续时间',
6. `continuity_count` bigint COMMENT '活跃次数'
7. )
8. row format delimited fields terminated by '\t'
9. location '/warehouse/gmall/ads/ads_continuity_wk_count';
2)导入 2020-03-20 所在周的数据
1. insert into table ads_continuity_wk_count
2. select
3. '2020-03-15',
4. concat(date_add(next_day('2020-03-15','MO'),-7*3),'_',date_add(next_day('
5. 2020-03-15','MO'),-1)),
6. count(*)
7. from
8. (
9. select
10. mid_id
11. from
12. (
13. select
14. mid_id
15. from dws_uv_detail_daycount
16. where dt>=date_add(next_day('2020-03-10','monday'),-7)
17. and dt<=date_add(next_day('2020-03-10','monday'),-1)
18. group by mid_id
19. union all
20. select
21. mid_id
22. from dws_uv_detail_daycount
23. where dt>=date_add(next_day('2020-03-10','monday'),-7*2)
24. and dt<=date_add(next_day('2020-03-10','monday'),-7-1)
25. group by mid_id
26. union all
27. select
28. mid_id
29. from dws_uv_detail_daycount
30. where dt>=date_add(next_day('2020-03-10','monday'),-7*3)
31. and dt<=date_add(next_day('2020-03-10','monday'),-7*2-1)
32. group by mid_id
33. )t1
34. group by mid_id
35. having count(*)=3
36. )t2
3)查询
1. select * from ads_continuity_wk_count;
1.1.8 最近七天内连续三天活跃用户数
1)建表语句
1. drop table if exists ads_continuity_uv_count;
2. create external table ads_continuity_uv_count(
3. `dt` string COMMENT '统计日期',
4. `wk_dt` string COMMENT '最近 7 天日期',
5. `continuity_count` bigint
6. ) COMMENT '连续活跃设备数'
7. row format delimited fields terminated by '\t'
8. location '/warehouse/gmall/ads/ads_continuity_uv_count';
2)写出导入数据的 SQL 语句
1. insert into table ads_continuity_uv_count
2. select
3. '2020-03-12',
4. concat(date_add('2020-03-12',-6),'_','2020-03-12'),
5. count(*)
6. from
7. (
8. select mid_id
9. from
10. (
11. select mid_id
12. from
13. (
14. select
15. mid_id,
16. date_sub(dt,rank) date_dif
17. from
18. (
19. select
20. mid_id,
21. dt,
22. rank() over(partition by mid_id order by dt) rank
23. from dws_uv_detail_daycount
24. where dt>=date_add('2020-03-12',-6) and
25. dt<='2020-03-12'
26. )t1
27. )t2
28. group by mid_id,date_dif
29. having count(*)>=3
30. )t3
31. group by mid_id
32. )t4;
3)查询
1. select * from ads_continuity_uv_count;
1.2 会员主题
1.2.1 会员主题信息
1)建表
1. drop table if exists ads_user_topic;
2. create external table ads_user_topic(
3. `dt` string COMMENT '统计日期',
4. `day_users` string COMMENT '活跃会员数',
5. `day_new_users` string COMMENT '新增会员数',
6. `day_new_payment_users` string COMMENT '新增消费会员数',
7. `payment_users` string COMMENT '总付费会员数',
8. `users` string COMMENT '总会员数',
9. `day_users2users` decimal(10,2) COMMENT '会员活跃率',
10. `payment_users2users` decimal(10,2) COMMENT '会员付费率',
11. `day_new_users2users` decimal(10,2) COMMENT '会员新鲜度'
12. ) COMMENT '会员主题信息表'
13. row format delimited fields terminated by '\t'
14. location '/warehouse/gmall/ads/ads_user_topic';
15.
复制代码
2)导入数据
1. insert into table ads_user_topic
2. select
3. '2020-03-10',
4. sum(if(login_date_last='2020-03-10',1,0)),
5. sum(if(login_date_first='2020-03-10',1,0)),
6. sum(if(payment_date_first='2020-03-10',1,0)),
7. sum(if(payment_count>0,1,0)),
8. count(*),
9. sum(if(login_date_last='2020-03-10',1,0))/count(*),
10. sum(if(payment_count>0,1,0))/count(*),
11. sum(if(login_date_first='2020-03-10',1,0))/sum(if(login_date_last='2020-03-10',1,0))
12. from dwt_user_topic
3)查询数据
1. hive (gmall)> select * from ads_user_topic;
4)vim ads_user_topic.sh
添加如下内容:
1. #!/bin/bash
2. APP=gmall
3. hive=/opt/modules/hive/bin/hive
4. # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
5. if [ -n "$1" ] ;then
6. do_date=$1
7. else
8. do_date=`date -d "-1 day" +%F`
9. fi
10. sql="
11. with
12. tmp_day_users as
13. (
14. select
15. '$do_date' dt,
16. count(*) day_users
17. from
18. ${APP}.dwt_user_topic
19. where
20. login_date_last='$do_date'
21. ),
22. tmp_day_new_users as
23. (
24. select
25. '$do_date' dt,
26. count(*) day_new_users
27. from
28. ${APP}.dwt_user_topic
29. where
30. login_date_last='$do_date' and login_date_first='$do_date'
31. ),
32. tmp_day_new_payment_users as
33. (
34. select
35. '$do_date' dt,
36. count(*) day_new_payment_users
37. from
38. ${APP}.dwt_user_topic
39. where
40. payment_date_first='$do_date'
41. ),
42. tmp_payment_users as
43. (
44. select
45. '$do_date' dt,
46. count(*) payment_users
47. from
48. ${APP}.dwt_user_topic
49. where
50. payment_date_first is not null
51. ),
52. tmp_users as
53. (
54. select
55. '$do_date' dt,
56. count(*) users
57. from
58. ${APP}.dwt_user_topic
59. tmp_users
60. )
61. insert into table ${APP}.ads_user_topic
62. select
63. '$do_date' dt,
64. day_users,
65. day_new_users,
66. day_new_payment_users,
67. payment_users,
68. users,
69. day_users/users,
70. payment_users/users,
71. day_new_users/users
72. from
73. tmp_day_users
74. join
75. tmp_day_new_users
76. on
77. tmp_day_users.dt=tmp_day_new_users.dt
78. join
79. tmp_day_new_payment_users
80. on
81. tmp_day_users.dt=tmp_day_new_payment_users.dt
82. join
83. tmp_payment_users
84. on
85. tmp_day_users.dt=tmp_payment_users.dt
86. join
87. tmp_users
88. on
89. tmp_day_users.dt=tmp_users.dt;
90. "
91. $hive -e "$sql"
92.
复制代码
5)增加脚本执行权限
1. chmod 770 ads_user_topic.sh
6)执行脚本导入数据
1. ads_user_topic.sh 2020-03-11
7)查看导入数据
1. select * from ads_user_topic;
1.2.2 漏斗分析
统计“浏览->购物车->下单->支付”的转化率
思路:统计各个行为的人数,然后计算比值
1)建表语句
1. drop table if exists ads_user_action_convert_day;
2. create external table ads_user_action_convert_day(
3. `dt` string COMMENT '统计日期',
4. `total_visitor_m_count` bigint COMMENT '总访问人数',
5. `cart_u_count` bigint COMMENT '加入购物车的人数',
6. `visitor2cart_convert_ratio` decimal(10,2) COMMENT '访问到加入购物车转化率',
7. `order_u_count` bigint COMMENT '下单人数',
8. `cart2order_convert_ratio` decimal(10,2) COMMENT '加入购物车到下单转化率',
9. `payment_u_count` bigint COMMENT '支付人数',
10. `order2payment_convert_ratio` decimal(10,2) COMMENT '下单到支付的转化率'
11. ) COMMENT '用户行为漏斗分析'
12. row format delimited fields terminated by '\t'
13. location '/warehouse/gmall/ads/ads_user_action_convert_day/';
2)数据装载
1. insert into table ads_user_action_convert_day
2. select
3. '2020-03-10',
4. uv.day_count,
5. ua.cart_count,
6. cast(ua.cart_count/uv.day_count as decimal(10,2)) visitor2cart_convert_ratio,
7. ua.order_count,
8. cast(ua.order_count/ua.cart_count as decimal(10,2)) visitor2order_convert_ratio,
9. ua.payment_count,
10. cast(ua.payment_count/ua.order_count as decimal(10,2)) order2payment_convert_ratio
11. from
12. (
13. select
14. dt,
15. sum(if(cart_count>0,1,0)) cart_count,
16. sum(if(order_count>0,1,0)) order_count,
17. sum(if(payment_count>0,1,0)) payment_count
18. from dws_user_action_daycount
19. where dt='2020-03-10'
20. group by dt
21. )ua join ads_uv_count uv on uv.dt=ua.dt;
3)查询加载数据
1. select * from ads_user_action_convert_day;
2.
复制代码
1.3 商品主题
1.3.1 商品个数信息
1)建表语句
1. drop table if exists ads_product_info;
2. create external table ads_product_info(
3. `dt` string COMMENT '统计日期',
4. `sku_num` string COMMENT 'sku 个数',
5. `spu_num` string COMMENT 'spu 个数'
6. ) COMMENT '商品个数信息'
7. row format delimited fields terminated by '\t'
8. location '/warehouse/gmall/ads/ads_product_info';
2)导入数据
1. insert into table ads_product_info
2. select
3. '2020-03-10' dt,
4. sku_num,
5. spu_num
6. from
7. (
8. select
9. '2020-03-10' dt,
10. count(*) sku_num
11. from
12. dwt_sku_topic
13. ) tmp_sku_num
14. join
15. (
16. select
17. '2020-03-10' dt,
18. count(*) spu_num
19. from
20. (
21. select
22. spu_id
23. from
24. dwt_sku_topic
25. group by
26. spu_id
27. ) tmp_spu_id
28. ) tmp_spu_num
29. on
30. tmp_sku_num.dt=tmp_spu_num.dt;
3)查询结果数据
1. select * from ads_product_info;
1.3.2 商品销量排名
1)建表语句
1. drop table if exists ads_product_sale_topN;
2. create external table ads_product_sale_topN(
3. `dt` string COMMENT '统计日期',
4. `sku_id` string COMMENT '商品 ID',
5. `payment_amount` bigint COMMENT '销量'
6. ) COMMENT '商品个数信息'
7. row format delimited fields terminated by '\t'
8. location '/warehouse/gmall/ads/ads_product_sale_topN';
2)导入数据
1. insert into table ads_product_sale_topN
2. select
3. '2020-03-10' dt,
4. sku_id,
5. payment_amount
6. from
7. dws_sku_action_daycount
8. where
9. dt='2020-03-10'
10. order by payment_amount desc
11. limit 10;
3)查询结果数据
1. select * from ads_product_sale_topN;
2.
复制代码
1.3.3 商品收藏排名
1)建表语句
1. drop table if exists ads_product_favor_topN;
2. create external table ads_product_favor_topN(
3. `dt` string COMMENT '统计日期',
4. `sku_id` string COMMENT '商品 ID',
5. `favor_count` bigint COMMENT '收藏量'
6. ) COMMENT '商品收藏 TopN'
7. row format delimited fields terminated by '\t'
8. location '/warehouse/gmall/ads/ads_product_favor_topN';
2)导入数据
1. insert into table ads_product_favor_topN
2. select
3. '2020-03-10' dt,
4. sku_id,
5. favor_count
6. from
7. dws_sku_action_daycount
8. where
9. dt='2020-03-10'
10. order by favor_count desc
11. limit 10;
3)查询数据
1. select * from ads_product_favor_topN;
1.3.4 商品加入购物车排名
1)建表语句
1. drop table if exists ads_product_cart_topN;
2. create external table ads_product_cart_topN(
3. `dt` string COMMENT '统计日期',
4. `sku_id` string COMMENT '商品 ID',
5. `cart_num` bigint COMMENT '加入购物车数量'
6. ) COMMENT '商品加入购物车 TopN'
7. row format delimited fields terminated by '\t'
8. location '/warehouse/gmall/ads/ads_product_cart_topN';
2)导入数据
1. insert into table ads_product_cart_topN
2. select
3. '2020-03-10' dt,
4. sku_id,
5. cart_num
6. from
7. dws_sku_action_daycount
8. where
9. dt='2020-03-10'
10. order by cart_num desc
11. limit 10;
3)查询数据
1. select * from ads_product_cart_topN;
1.3.5 商品退款率排名(最近 30 天)
1)建表语句
1. drop table if exists ads_product_refund_topN;
2. create external table ads_product_refund_topN(
3. `dt` string COMMENT '统计日期',
4. `sku_id` string COMMENT '商品 ID',
5. `refund_ratio` decimal(10,2) COMMENT '退款率'
6. ) COMMENT '商品退款率 TopN'
7. row format delimited fields terminated by '\t'
8. location '/warehouse/gmall/ads/ads_product_refund_topN';
2)导入数据
1. insert into table ads_product_refund_topN
2. select
3. '2020-03-10',
4. sku_id,
5. refund_last_30d_count/payment_last_30d_count*100 refund_ratio
6. from dwt_sku_topic
7. order by refund_ratio desc
8. limit 10;
3)查询数据
1. select * from ads_product_refund_topN;
1.3.6 商品差评率
1)建表语句
1. drop table if exists ads_appraise_bad_topN;
2. create external table ads_appraise_bad_topN(
3. `dt` string COMMENT '统计日期',
4. `sku_id` string COMMENT '商品 ID',
5. `appraise_bad_ratio` decimal(10,2) COMMENT '差评率'
6. ) COMMENT '商品差评率 TopN'
7. row format delimited fields terminated by '\t'
8. location '/warehouse/gmall/ads/ads_appraise_bad_topN';
2)导入数据
1. insert into table ads_appraise_bad_topN
2. select
3. '2020-03-10' dt,
4. sku_id,
5. appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_coun
6. t+appraise_default_count) appraise_bad_ratio
7. from
8. dws_sku_action_daycount
9. where
10. dt='2020-03-10'
11. order by appraise_bad_ratio desc
12. limit 10;
3)查询数据
1. select * from ads_appraise_bad_topN;
2.
复制代码
1.4 营销主题(用户+商品+购买行为)
1.4.1 下单数目统计
需求分析:统计每日下单数,下单金额及下单用户数
1)建表语句
1. drop table if exists ads_order_daycount;
2. create external table ads_order_daycount(
3. dt string comment '统计日期',
4. order_count bigint comment '单日下单笔数',
5. order_amount decimal(10,2) comment '单日下单金额',
6. order_users bigint comment '单日下单用户数'
7. ) comment '每日订单总计表'
8. row format delimited fields terminated by '\t'
9. location '/warehouse/gmall/ads/ads_order_daycount';
2)导入数据
1. insert into table ads_order_daycount
2. select
3. '2020-03-10',
4. sum(order_count),
5. sum(order_amount),
6. sum(if(order_count>0,1,0))
7. from dws_user_action_daycount
8. where dt='2020-03-10';
3)查询数据
1. select * from ads_order_daycount;
2.
复制代码
1.4.2 支付信息统计
每日支付金额、支付人数、支付商品数、支付笔数以及下单到支付的平均时长(取自 DWD)
1)建表
1. drop table if exists ads_payment_daycount;
2. create external table ads_payment_daycount(
3. dt string comment '统计日期',
4. payment_count bigint comment '单日支付笔数',
5. payment_amount decimal(10,2) comment '单日支付金额',
6. payment_user_count bigint comment '单日支付人数',
7. payment_sku_count bigint comment '单日支付商品数',
8. payment_avg_time double comment '下单到支付的平均时长,取分钟数'
9. ) comment '每日订单总计表'
10. row format delimited fields terminated by '\t'
11. location '/warehouse/gmall/ads/ads_payment_daycount';
2)导入数据
1. insert into table ads_payment_daycount
2. select
3. tmp_payment.dt,
4. tmp_payment.payment_count,
5. tmp_payment.payment_amount,
6. tmp_payment.payment_user_count,
7. tmp_skucount.payment_sku_count,
8. tmp_time.payment_avg_time
9. from
10. (
11. select
12. '2020-03-15' dt,
13. sum(payment_count) payment_count,
14. sum(payment_amount) payment_amount,
15. sum(if(payment_count>0,1,0)) payment_user_count
16. from dws_user_action_daycount
17. where dt='2020-03-15'
18. )tmp_payment
19. join
20. (
21. select
22. '2020-03-15' dt,
23. sum(if(payment_count>0,1,0)) payment_sku_count
24. from dws_sku_action_daycount
25. where dt='2020-03-15'
26. )tmp_skucount on tmp_payment.dt=tmp_skucount.dt
27. join
28. (
29. select
30. '2020-03-15' dt,
31. sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60
32. payment_avg_time
33. from dwd_fact_order_info
34. where dt='2020-03-15'
35. and payment_time is not null
36. )tmp_time on tmp_payment.dt=tmp_time.dt
3)查询数据
1. select * from ads_payment_daycount;
1.4.3 复购率
1)建表语句
1. drop table ads_sale_tm_category1_stat_mn;
2. create external table ads_sale_tm_category1_stat_mn
3. (
4. tm_id string comment '品牌 id',
5. category1_id string comment '1 级品类 id ',
6. category1_name string comment '1 级品类名称 ',
7. buycount bigint comment '购买人数',
8. buy_twice_last bigint comment '两次以上购买人数',
9. buy_twice_last_ratio decimal(10,2) comment '单次复购率',
10. buy_3times_last bigint comment '三次以上购买人数',
11. buy_3times_last_ratio decimal(10,2) comment '多次复购率',
12. stat_mn string comment '统计月份',
13. stat_date string comment '统计日期'
14. ) COMMENT '复购率统计'
15. row format delimited fields terminated by '\t'
16. location '/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/';
2)数据导入
1. insert into table ads_sale_tm_category1_stat_mn
2. select
3. mn.sku_tm_id,
4. mn.sku_category1_id,
5. mn.sku_category1_name,
6. sum(if(mn.order_count>=1,1,0)) buycount,
7. sum(if(mn.order_count>=2,1,0)) buyTwiceLast,
8. sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0))
9. buyTwiceLastRatio,
10. sum(if(mn.order_count>=3,1,0)) buy3timeLast ,
11. sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0))
12. buy3timeLastRatio ,
13. date_format('2020-03-10' ,'yyyy-MM') stat_mn,
14. '2020-03-10' stat_date
15. from
16. (
17. select
18. user_id,
19. sd.sku_tm_id,
20. sd.sku_category1_id,
21. sd.sku_category1_name,
22. sum(order_count) order_count
23. from dws_sale_detail_daycount sd
24. where date_format(dt,'yyyy-MM')=date_format('2020-03-10' ,'yyyy-MM')
25. group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name
26. ) mn
27. group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;
1.5 ADS 层导入脚本
1)vim dwt_to_ads.sh
在脚本中填写如下内容
1. #!/bin/bash
2. hive=/opt/modules/hive/bin/hive
3. # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
4. if [ -n "$1" ] ;then
5. do_date=$1
6. else
7. do_date=`date -d "-1 day" +%F`
8. fi
9. sql="use gmall;
10. insert into table ads_uv_count
11. select
12. '$do_date',
13. sum(if(login_date_last='$do_date',1,0)),
14. sum(if(login_date_last>=date_add(next_day('$do_date','monday'),-7) and
15. login_date_last<=date_add(next_day('$do_date','monday'),-1) ,1,0)),
16. sum(if(date_format(login_date_last,'yyyy-MM')=date_format('$do_date','yyyy-M
17. M'),1,0)),
18. if('$do_date'=date_add(next_day('$do_date','monday'),-1),'Y','N'),
19. if('$do_date'=last_day('$do_date'),'Y','N')
20. from dwt_uv_topic;
21.
22.
23. insert into table ads_new_mid_count
24. select
25. '$do_date',
26. count(*)
27. from dwt_uv_topic
28. where login_date_first='$do_date';
29.
30.
31. insert into table ads_silent_count
32. select
33. '$do_date',
34. count(*)
35. from dwt_uv_topic
36. where login_date_first=login_date_last
37. and login_date_last<=date_add('$do_date',-7);
38.
39.
40. insert into table ads_back_count
41. select
42. '$do_date',
43. concat(date_add(next_day('2020-03-10','MO'),-7),'_',date_add(next_day('2020-
44. 03-10','MO'),-1)),
45. count(*)
46. from
47. (
48. select
49. mid_id
50. from dwt_uv_topic
51. where login_date_last>=date_add(next_day('$do_date','MO'),-7)
52. and login_date_last<= date_add(next_day('$do_date','MO'),-1)
53. and login_date_first<date_add(next_day('$do_date','MO'),-7)
54. )current_wk
55. left join
56. (
57. select
58. mid_id
59. from dws_uv_detail_daycount
60. where dt>=date_add(next_day('$do_date','MO'),-7*2)
61. and dt<= date_add(next_day('$do_date','MO'),-7-1)
62. group by mid_id
63. )last_wk
64. on current_wk.mid_id=last_wk.mid_id
65. where last_wk.mid_id is null;
66.
67.
68. insert into table ads_wastage_count
69. select
70. '$do_date',
71. count(*)
72. from dwt_uv_topic
73. where login_date_last<=date_add('$do_date',-7);
74.
75.
76. insert into table ads_user_retention_day_rate
77. select
78. '$do_date',
79. date_add('$do_date',-3),
80. 3,
81. sum(if(login_date_first=date_add('$do_date',-3) and
82. login_date_last='$do_date',1,0)),
83. sum(if(login_date_first=date_add('$do_date',-3),1,0)),
84. sum(if(login_date_first=date_add('$do_date',-3) and
85. login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',
86. -3),1,0))*100
87. from dwt_uv_topic
88. union all
89. select
90. '$do_date',
91. date_add('$do_date',-2),
92. 2,
93. sum(if(login_date_first=date_add('$do_date',-2) and
94. login_date_last='$do_date',1,0)),
95. sum(if(login_date_first=date_add('$do_date',-2),1,0)),
96. sum(if(login_date_first=date_add('$do_date',-2) and
97. login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',
98. -2),1,0))*100
99. from dwt_uv_topic
100. union all
101. select
102. '$do_date',
103. date_add('$do_date',-1),
104. 1,
105. sum(if(login_date_first=date_add('$do_date',-1) and
106. login_date_last='$do_date',1,0)),
107. sum(if(login_date_first=date_add('$do_date',-1),1,0)),
108. sum(if(login_date_first=date_add('$do_date',-1) and
109. login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',
110. -1),1,0))*100
111. from dwt_uv_topic;
112.
113.
114. insert into table ads_continuity_wk_count
115. select
116. '$do_date',
117. concat(date_add(next_day('$do_date','MO'),-7*3),'_',date_add(next_day('$do_d
118. ate','MO'),-1)),
119. count(*)
120. from
121. (
122. select
123. mid_id
124. from
125. (
126. select
127. mid_id
128. from dws_uv_detail_daycount
129. where dt>=date_add(next_day('$do_date','monday'),-7)
130. and dt<=date_add(next_day('$do_date','monday'),-1)
131. group by mid_id
132. union all
133. select
134. mid_id
135. from dws_uv_detail_daycount
136. where dt>=date_add(next_day('$do_date','monday'),-7*2)
137. and dt<=date_add(next_day('$do_date','monday'),-7-1)
138. group by mid_id
139. union all
140. select
141. mid_id
142. from dws_uv_detail_daycount
143. where dt>=date_add(next_day('$do_date','monday'),-7*3)
144. and dt<=date_add(next_day('$do_date','monday'),-7*2-1)
145. group by mid_id
146. )t1
147. group by mid_id
148. having count(*)=3
149. )t2;
150.
151.
152. insert into table ads_continuity_uv_count
153. select
154. '$do_date',
155. concat(date_add('$do_date',-6),'_','$do_date'),
156. count(*)
157. from
158. (
159. select mid_id
160. from
161. (
162. select mid_id
163. from
164. (
165. select
166. mid_id,
167. date_sub(dt,rank) date_dif
168. from
169. (
170. select
171. mid_id,
172. dt,
173. rank() over(partition by mid_id order by dt) rank
174. from dws_uv_detail_daycount
175. where dt>=date_add('$do_date',-6) and dt<='$do_date'
176. )t1
177. )t2
178. group by mid_id,date_dif
179. having count(*)>=3
180. )t3
181. group by mid_id
182. )t4;
183.
184.
185. insert into table ads_user_topic
186. select
187. '$do_date',
188. sum(if(login_date_last='$do_date',1,0)),
189. sum(if(login_date_first='$do_date',1,0)),
190. sum(if(payment_date_first='$do_date',1,0)),
191. sum(if(payment_count>0,1,0)),
192. count(*),
193. sum(if(login_date_last='$do_date',1,0))/count(*),
194. sum(if(payment_count>0,1,0))/count(*),
195. sum(if(login_date_first='$do_date',1,0))/sum(if(login_date_last='$do_date',1,
196. 0))
197. from dwt_user_topic;
198.
199.
200. insert into table ads_user_action_convert_day
201. select
202. '$do_date',
203. uv.day_count,
204. ua.cart_count,
205. ua.cart_count/uv.day_count*100 visitor2cart_convert_ratio,
206. ua.order_count,
207. ua.order_count/ua.cart_count*100 visitor2order_convert_ratio,
208. ua.payment_count,
209. ua.payment_count/ua.order_count*100 order2payment_convert_ratio
210. from
211. (
212. select
213. '$do_date' dt,
214. sum(if(cart_count>0,1,0)) cart_count,
215. sum(if(order_count>0,1,0)) order_count,
216. sum(if(payment_count>0,1,0)) payment_count
217. from dws_user_action_daycount
218. where dt='$do_date'
219. )ua join ads_uv_count uv on uv.dt=ua.dt;
220.
221.
222. insert into table ads_product_info
223. select
224. '$do_date' dt,
225. sku_num,
226. spu_num
227. from
228. (
229. select
230. '$do_date' dt,
231. count(*) sku_num
232. from
233. dwt_sku_topic
234. ) tmp_sku_num
235. join
236. (
237. select
238. '$do_date' dt,
239. count(*) spu_num
240. from
241. (
242. select
243. spu_id
244. from
245. dwt_sku_topic
246. group by
247. spu_id
248. ) tmp_spu_id
249. ) tmp_spu_num
250. on tmp_sku_num.dt=tmp_spu_num.dt;
251.
252.
253. insert into table ads_product_sale_topN
254. select
255. '$do_date',
256. sku_id,
257. payment_amount
258. from dws_sku_action_daycount
259. where dt='$do_date'
260. order by payment_amount desc
261. limit 10;
262.
263.
264. insert into table ads_product_favor_topN
265. select
266. '$do_date',
267. sku_id,
268. favor_count
269. from dws_sku_action_daycount
270. where dt='$do_date'
271. order by favor_count
272. limit 10;
273.
274.
275. insert into table ads_product_cart_topN
276. select
277. '$do_date' dt,
278. sku_id,
279. cart_num
280. from dws_sku_action_daycount
281. where dt='$do_date'
282. order by cart_num
283. limit 10;
284.
285.
286. insert into table ads_product_refund_topN
287. select
288. '$do_date',
289. sku_id,
290. refund_last_30d_count/payment_last_30d_count*100 refund_ratio
291. from dwt_sku_topic
292. order by refund_ratio desc
293. limit 10;
294.
295.
296. insert into table ads_appraise_bad_topN
297. select
298. '$do_date' dt,
299. sku_id,
300. appraise_bad_count/(appraise_bad_count+appraise_good_count+appraise_mid_coun
301. t+appraise_default_count)*100 appraise_bad_ratio
302. from dws_sku_action_daycount
303. where dt='$do_date'
304. order by appraise_bad_ratio desc
305. limit 10;
306.
307.
308. insert into table ads_order_daycount
309. select
310. '$do_date',
311. sum(order_count),
312. sum(order_amount),
313. sum(if(order_count>0,1,0))
314. from dws_user_action_daycount
315. where dt='$do_date';
316.
317.
318. insert into table ads_payment_daycount
319. select
320. tmp_payment.dt,
321. tmp_payment.payment_count,
322. tmp_payment.payment_amount,
323. tmp_payment.payment_user_count,
324. tmp_skucount.payment_sku_count,
325. tmp_time.payment_avg_time
326. from
327. (
328. select
329. '$do_date' dt,
330. sum(payment_count) payment_count,
331. sum(payment_amount) payment_amount,
332. sum(if(payment_count>0,1,0)) payment_user_count
333. from dws_user_action_daycount
334. where dt='$do_date'
335. )tmp_payment
336. join
337. (
338. select
339. '$do_date' dt,
340. sum(if(payment_count>0,1,0)) payment_sku_count
341. from dws_sku_action_daycount
342. where dt='$do_date'
343. )tmp_skucount on tmp_payment.dt=tmp_skucount.dt
344. join
345. (
346. select
347. '$do_date' dt,
348. sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60
349. payment_avg_time
350. from dwd_fact_order_info
351. where dt='$do_date'
352. and payment_time is not null
353. )tmp_time on tmp_payment.dt=tmp_time.dt;
354.
355.
356. insert into table ads_sale_tm_category1_stat_mn
357. select
358. mn.sku_tm_id,
359. mn.sku_category1_id,
360. mn.sku_category1_name,
361. sum(if(mn.order_count>=1,1,0)) buycount,
362. sum(if(mn.order_count>=2,1,0)) buyTwiceLast,
363. sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0))
364. buyTwiceLastRatio,
365. sum(if(mn.order_count>=3,1,0)) buy3timeLast ,
366. sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0))
367. buy3timeLastRatio ,
368. date_format('$do_date' ,'yyyy-MM') stat_mn,
369. '$do_date' stat_date
370. from
371. (
372. select
373. user_id,
374. sd.sku_tm_id,
375. sd.sku_category1_id,
376. sd.sku_category1_name,
377. sum(order_count) order_count
378. from dws_sale_detail_daycount sd
379. where date_format(dt,'yyyy-MM')=date_format('$do_date' ,'yyyy-MM')
380. group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name
381. ) mn
382. group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;
383. "
384. $hive -e "$sql"
385.
复制代码
2)增加脚本执行权限
1. chmod 770 dwt_to_ads.sh
3)执行脚本导入数据
1. dwt_to_ads.sh 2020-03-10
4)查看导入数据