今日内容:
1.1 销售主题的日统计宽表
可分析的主要指标有:销售收入、平台收入、配送成交额、小程序成交额、安卓APP成交额、苹果APP成交额、PC商城成交额、订单量、参评单量、差评单量、配送单量、退款单量、小程序订单量、安卓APP订单量、苹果APP订单量、PC商城订单量。
维度有:日期、城市、商圈、店铺、品牌、大类、中类、小类。
维度组合:
日期: 日
日期 + 城市
日期 + 城市 + 商圈
日期 + 城市 + 商圈 + 店铺
日期 + 品牌
日期 + 大类
日期 + 大类 + 中类
日期 + 大类 + 中类 + 小类
16 * 8 = 128 个需求指标结果
分析, 当前需求统计的这些维度 和 指标, 需要涉及到那些表, 以及涉及到那些字段呢?
维度字段:
日期: dwb_order_detail.dt
城市: dwb_shop_detail: city_id 和 city_name
商圈: dwb_shop_detail: trade_area_id 和 trade_area_name
店铺: dwb_shop_detail: id 和 store_name
品牌: dwb_goods_detail: brand_id 和 brand_name
大类: dwb_goods_detail: max_class_id 和 max_class_name
中类: dwb_goods_detail: mid_class_id 和 mid_class_name
小类: dwb_goods_detail: min_class_id 和 min_class_name
指标字段:
订单量相关指标: dwb_order_detail.order_id
订单销售收入(销售收入, 小程序, 安卓, 苹果, pc端):dwb_order_detail.order_amount
平台收入: dwb_order_detail.plat_fee
配送费: wb_order_detail.delivery_fee
涉及表:
订单明细宽表(当前主题的事实表): dwb_order_detail (事实表)
店铺明细宽表: dwb_shop_detail (维度表)
商品明细宽表: dwb_goods_detail (维度表)
关联条件:
订单表 和 店铺表:
订单明细宽表.store_id = 店铺明细宽表.id
订单表 和 商品表:
订单明细宽表.goods_id = 商品明细宽表.id
思考: 当前这个是三种数仓模型那一种呢? 星型模型
是否需要过滤一些操作呢?
1- 保证必须是支付状态: is_pay = 1
2- 保证订单状态: order_state 不能是 1(已下单, 没有付款) 和 7 (已取消)
首先, 先创建DWS层库 和 表(销售主题日统计宽表) (在HIVE中重新建表)
-- 创建库:
create database if not exists bj59_yp_dws_jiale;
-- 创建表(指标字段 + 维度字段 + 经验字段 ):
DROP TABLE IF EXISTS bj59_yp_dws_jiale.dws_sale_daycount;
CREATE TABLE bj59_yp_dws_jiale.dws_sale_daycount(
--dt STRING,
province_id string COMMENT '省份id',
province_name string COMMENT '省份名称',
city_id string COMMENT '城市id',
city_name string COMMENT '城市name',
trade_area_id string COMMENT '商圈id',
trade_area_name string COMMENT '商圈名称',
store_id string COMMENT '店铺的id',
store_name string COMMENT '店铺名称',
brand_id string COMMENT '品牌id',
brand_name string COMMENT '品牌名称',
max_class_id string COMMENT '商品大类id',
max_class_name string COMMENT '大类名称',
mid_class_id string COMMENT '中类id',
mid_class_name string COMMENT '中类名称',
min_class_id string COMMENT '小类id',
min_class_name string COMMENT '小类名称',
-- 用于标记数据结果是按照那个维度来统计的一个经验字段
group_type string COMMENT '分组类型:store,trade_area,city,brand,min_class,mid_class,max_class, all',
-- =======日统计=======
-- 销售收入
sale_amt DECIMAL(38,2) COMMENT '销售收入',
-- 平台收入
plat_amt DECIMAL(38,2) COMMENT '平台收入',
-- 配送成交额
deliver_sale_amt DECIMAL(38,2) COMMENT '配送成交额',
-- 小程序成交额
mini_app_sale_amt DECIMAL(38,2) COMMENT '小程序成交额',
-- 安卓APP成交额
android_sale_amt DECIMAL(38,2) COMMENT '安卓APP成交额',
-- 苹果APP成交额
ios_sale_amt DECIMAL(38,2) COMMENT '苹果APP成交额',
-- PC商城成交额
pcweb_sale_amt DECIMAL(38,2) COMMENT 'PC商城成交额',
-- 成交单量
order_cnt BIGINT COMMENT '成交单量',
-- 参评单量
eva_order_cnt BIGINT COMMENT '参评单量comment=>cmt',
-- 差评单量
bad_eva_order_cnt BIGINT COMMENT '差评单量negtive-comment=>ncmt',
-- 配送成交单量
deliver_order_cnt BIGINT COMMENT '配送单量',
-- 退款单量
refund_order_cnt BIGINT COMMENT '退款单量',
-- 小程序成交单量
miniapp_order_cnt BIGINT COMMENT '小程序成交单量',
-- 安卓APP订单量
android_order_cnt BIGINT COMMENT '安卓APP订单量',
-- 苹果APP订单量
ios_order_cnt BIGINT COMMENT '苹果APP订单量',
-- PC商城成交单量
pcweb_order_cnt BIGINT COMMENT 'PC商城成交单量'
)
COMMENT '销售主题日统计宽表'
PARTITIONED BY(dt STRING)
ROW format delimited fields terminated BY '\t'
stored AS orc tblproperties ('orc.compress' = 'SNAPPY');
- SQL实现操作
-- 第一步: 对数据进行去重操作, 通过row_number实现
insert into hive.bj59_yp_dws_jiale.dws_sale_daycount
with t1 as (
select
-- 维度字段:
o.dt, -- 日期维度
s.province_id ,
s.province_name ,
s.city_id ,
s.city_name , -- 城市维度
s.trade_area_id ,
s.trade_area_name , -- 商圈维度
o.store_id ,
s.store_name , -- 店铺维度
g.brand_id ,
g.brand_name, -- 品牌维度
g.max_class_id ,
g.max_class_name , -- 大类
g.mid_class_id ,
g.mid_class_name , -- 中类
g.min_class_id ,
g.min_class_name , -- 小类
-- 指标字段
o.order_id , -- 订单id 计算订单量相关指标
o.order_amount, -- 订单销售收入
o.total_price , -- 商品销售收入
o.plat_fee , -- 平台分润
o.delivery_fee , -- 配送费用
-- 用于判断的字段
o.order_from , -- 渠道(小程序, 安卓, 苹果, pc)
o.evaluation_id , -- 评价表 id , 判断是否有参评
o.delievery_id , -- 配送表id, 判断是否有配送
o.geval_scores , -- 综合评分: 10分制
o.refund_id , -- 退款表id,判断是否有退款
-- 进行去重操作:
-- 计算 日期, 日期+城市, 日期+城市+商圈 , 日期+城市+商圈+店铺
row_number() over(partition by o.order_id) as order_rn,
-- 计算 日期 + 品牌 : 第一个去重计算 订单量 第二个去重 计算 销售额
row_number() over(partition by o.order_id,g.brand_id) as brand_rn,
row_number() over(partition by o.order_id,o.goods_id ,g.brand_id) as brand_goods_rn,
-- 计算 日期 + 大类
row_number() over(partition by o.order_id,g.max_class_id) as max_class_rn,
row_number() over(partition by o.order_id,o.goods_id ,g.max_class_id) as max_class_goods_rn,
-- 计算 日期 + 大类 + 中类
row_number() over(partition by o.order_id,g.max_class_id,g.mid_class_id) as mid_class_rn,
row_number() over(partition by o.order_id,o.goods_id ,g.max_class_id,g.mid_class_id) as mid_class_goods_rn,
-- 计算 日期 + 大类 + 中类 + 小类
row_number() over(partition by o.order_id,g.max_class_id,g.mid_class_id,g.min_class_id) as min_class_rn,
row_number() over(partition by o.order_id,o.goods_id ,g.max_class_id,g.mid_class_id,g.min_class_id) as min_class_goods_rn
from hive.bj59_yp_dwb_jiale.dwb_order_detail o
left join hive.bj59_yp_dwb_jiale.dwb_shop_detail s on o.store_id = s.id
left join hive.bj59_yp_dwb_jiale.dwb_goods_detail g on o.goods_id = g.id
where o.is_pay = 1 and o.order_state not in(1,7)
)
select
-- 维度字段
province_id,
province_name ,
city_id ,
city_name ,
trade_area_id ,
trade_area_name ,
store_id ,
store_name ,
brand_id ,
brand_name ,
max_class_id ,
max_class_name,
mid_class_id ,
mid_class_name,
min_class_id ,
min_class_name,
-- group_type字段的值, 需要根据不同的维度分组, 标上不同的值
case
when grouping(store_id) = 0 then 'store'
when grouping(trade_area_id) = 0 then 'trade_area'
when grouping(city_id) = 0 then 'city'
when grouping(brand_id) = 0 then 'brand'
when grouping(min_class_id) = 0 then 'min_class'
when grouping(mid_class_id) = 0 then 'mid_class'
when grouping(max_class_id) = 0 then 'max_class'
when grouping(dt) = 0 then 'all'
else 'other'
end as group_type,
-- 销售收入:
case
when grouping(store_id) = 0 then sum( if(order_rn = 1 and store_id is not null,coalesce(order_amount,0),0) )
when grouping(trade_area_id) = 0 then sum( if(order_rn = 1 and trade_area_id is not null,coalesce(order_amount,0),0) )
when grouping(city_id) = 0 then sum( if(order_rn = 1 and city_id is not null,coalesce(order_amount,0),0) )
when grouping(brand_id) = 0 then sum( if( brand_goods_rn = 1 and brand_id is not null,coalesce(total_price,0),0) )
when grouping(min_class_id) = 0 then sum( if( min_class_goods_rn = 1 and min_class_id is not null,coalesce(total_price,0),0) )
when grouping(mid_class_id) = 0 then sum( if( mid_class_goods_rn = 1 and mid_class_id is not null,coalesce(total_price,0),0) )
when grouping(max_class_id) = 0 then sum( if( max_class_goods_rn = 1 and max_class_id is not null,coalesce(total_price,0),0) )
when grouping(dt) = 0 then sum( if(order_rn = 1 and dt is not null,coalesce(order_amount,0),0) )
else NULL
end as sale_amt,
-- 平台收入:
case
when grouping(store_id) = 0 then sum( if(order_rn = 1 and store_id is not null,coalesce(plat_fee,0),0) )
when grouping(trade_area_id) = 0 then sum( if(order_rn = 1 and trade_area_id is not null,coalesce(plat_fee,0),0) )
when grouping(city_id) = 0 then sum( if(order_rn = 1 and city_id is not null,coalesce(plat_fee,0),0) )
when grouping(brand_id) = 0 then null
when grouping(min_class_id) = 0 then null
when grouping(mid_class_id) = 0 then null
when grouping(max_class_id) = 0 then null
when grouping(dt) = 0 then sum( if(order_rn = 1 and dt is not null,coalesce(plat_fee,0),0) )
else NULL
end as plat_amt,
-- 配送成交额
case
when grouping(store_id) = 0 then sum( if(order_rn = 1 and store_id is not null,coalesce(delivery_fee,0),0) )
when grouping(trade_area_id) = 0 then sum( if(order_rn = 1 and trade_area_id is not null,coalesce(delivery_fee,0),0) )
when grouping(city_id) = 0 then sum( if(order_rn = 1 and city_id is not null,coalesce(delivery_fee,0),0) )
when grouping(brand_id) = 0 then null
when grouping(min_class_id) = 0 then null
when grouping(mid_class_id) = 0 then null
when grouping(max_class_id) = 0 then null
when grouping(dt) = 0 then sum( if(order_rn = 1 and dt is not null,coalesce(delivery_fee,0),0) )
else NULL
end as deliver_sale_amt,
-- 小程序成交额:
case
when grouping(store_id) = 0
then sum(
if(
order_rn = 1 and store_id is not null and order_from = 'miniapp',
coalesce(order_amount,0),
0
)
)
when grouping(trade_area_id) = 0
then sum(
if(
order_rn = 1 and trade_area_id is not null and order_from = 'miniapp',
coalesce(order_amount,0),
0
)
)
when grouping(city_id) = 0
then sum(
if(
order_rn = 1 and city_id is not null and order_from = 'miniapp',
coalesce(order_amount,0),
0
)
)
when grouping(brand_id) = 0
then sum(
if(
brand_goods_rn = 1 and brand_id is not null and order_from = 'miniapp',
coalesce(total_price,0),
0
)
)
when grouping(min_class_id) = 0
then sum(
if(
min_class_goods_rn = 1 and min_class_id is not null and order_from = 'miniapp',
coalesce(total_price,0),
0
)
)
when grouping(mid_class_id) = 0
then sum(
if(
mid_class_goods_rn = 1 and mid_class_id is not null and order_from = 'miniapp',
coalesce(total_price,0),
0
)
)
when grouping(max_class_id) = 0
then sum(
if(
max_class_goods_rn = 1 and max_class_id is not null and order_from = 'miniapp',
coalesce(total_price,0),
0
)
)
when grouping(dt) = 0
then sum(
if(
order_rn = 1 and dt is not null and order_from = 'miniapp',
coalesce(order_amount,0),
0
)
)
else NULL
end as mini_app_sale_amt,
-- 安卓成交额
case
when grouping(store_id) = 0
then sum(
if(
order_rn = 1 and store_id is not null and order_from = 'android',
coalesce(order_amount,0),
0
)
)
when grouping(trade_area_id) = 0
then sum(
if(
order_rn = 1 and trade_area_id is not null and order_from = 'android',
coalesce(order_amount,0),
0
)
)
when grouping(city_id) = 0
then sum(
if(
order_rn = 1 and city_id is not null and order_from = 'android',
coalesce(order_amount,0),
0
)
)
when grouping(brand_id) = 0
then sum(
if(
brand_goods_rn = 1 and brand_id is not null and order_from = 'android',
coalesce(total_price,0),
0
)
)
when grouping(min_class_id) = 0
then sum(
if(
min_class_goods_rn = 1 and min_class_id is not null and order_from = 'android',
coalesce(total_price,0),
0
)
)
when grouping(mid_class_id) = 0
then sum(
if(
mid_class_goods_rn = 1 and mid_class_id is not null and order_from = 'android',
coalesce(total_price,0),
0
)
)
when grouping(max_class_id) = 0
then sum(
if(
max_class_goods_rn = 1 and max_class_id is not null and order_from = 'android',
coalesce(total_price,0),
0
)
)
when grouping(dt) = 0
then sum(
if(
order_rn = 1 and dt is not null and order_from = 'android',
coalesce(order_amount,0),
0
)
)
else NULL
end as android_sale_amt,
-- 苹果成交额
case
when grouping(store_id) = 0
then sum(
if(
order_rn = 1 and store_id is not null and order_from = 'ios',
coalesce(order_amount,0),
0
)
)
when grouping(trade_area_id) = 0
then sum(
if(
order_rn = 1 and trade_area_id is not null and order_from = 'ios',
coalesce(order_amount,0),
0
)
)
when grouping(city_id) = 0
then sum(
if(
order_rn = 1 and city_id is not null and order_from = 'ios',
coalesce(order_amount,0),
0
)
)
when grouping(brand_id) = 0
then sum(
if(
brand_goods_rn = 1 and brand_id is not null and order_from = 'ios',
coalesce(total_price,0),
0
)
)
when grouping(min_class_id) = 0
then sum(
if(
min_class_goods_rn = 1 and min_class_id is not null and order_from = 'ios',
coalesce(total_price,0),
0
)
)
when grouping(mid_class_id) = 0
then sum(
if(
mid_class_goods_rn = 1 and mid_class_id is not null and order_from = 'ios',
coalesce(total_price,0),
0
)
)
when grouping(max_class_id) = 0
then sum(
if(
max_class_goods_rn = 1 and max_class_id is not null and order_from = 'ios',
coalesce(total_price,0),
0
)
)
when grouping(dt) = 0
then sum(
if(
order_rn = 1 and dt is not null and order_from = 'ios',
coalesce(order_amount,0),
0
)
)
else NULL
end as ios_sale_amt,
-- PC成交额
case
when grouping(store_id) = 0
then sum(
if(
order_rn = 1 and store_id is not null and order_from = 'pcweb',
coalesce(order_amount,0),
0
)
)
when grouping(trade_area_id) = 0
then sum(
if(
order_rn = 1 and trade_area_id is not null and order_from = 'pcweb',
coalesce(order_amount,0),
0
)
)
when grouping(city_id) = 0
then sum(
if(
order_rn = 1 and city_id is not null and order_from = 'pcweb',
coalesce(order_amount,0),
0
)
)
when grouping(brand_id) = 0
then sum(
if(
brand_goods_rn = 1 and brand_id is not null and order_from = 'pcweb',
coalesce(total_price,0),
0
)
)
when grouping(min_class_id) = 0
then sum(
if(
min_class_goods_rn = 1 and min_class_id is not null and order_from = 'pcweb',
coalesce(total_price,0),
0
)
)
when grouping(mid_class_id) = 0
then sum(
if(
mid_class_goods_rn = 1 and mid_class_id is not null and order_from = 'pcweb',
coalesce(total_price,0),
0
)
)
when grouping(max_class_id) = 0
then sum(
if(
max_class_goods_rn = 1 and max_class_id is not null and order_from = 'pcweb',
coalesce(total_price,0),
0
)
)
when grouping(dt) = 0
then sum(
if(
order_rn = 1 and dt is not null and order_from = 'pcweb',
coalesce(order_amount,0),
0
)
)
else NULL
end as pcweb_sale_amt,
--- 订单量相关指标:
-- 成交单量:
case
when grouping(store_id) = 0
then count(
if(
order_rn = 1 and store_id is not null,
order_id,
NULL
)
)
when grouping(trade_area_id) = 0
then count(
if(
order_rn = 1 and trade_area_id is not null,
order_id,
NULL
)
)
when grouping(city_id) = 0
then count(
if(
order_rn = 1 and city_id is not null,
order_id,
NULL
)
)
when grouping(brand_id) = 0
then count(
if(
brand_rn = 1 and brand_id is not null,
order_id,
NULL
)
)
when grouping(min_class_id) = 0
then count(
if(
min_class_rn = 1 and min_class_id is not null,
order_id,
NULL
)
)
when grouping(mid_class_id) = 0
then count(
if(
mid_class_rn = 1 and mid_class_id is not null,
order_id,
NULL
)
)
when grouping(max_class_id) = 0
then count(
if(
max_class_rn = 1 and max_class_id is not null,
order_id,
NULL
)
)
when grouping(dt) = 0
then count(
if(
order_rn = 1 and dt is not null,
order_id,
NULL
)
)
else NULL
end as order_cnt,
-- 参评单量:
case
when grouping(store_id) = 0
then count(
if(
order_rn = 1 and store_id is not null and evaluation_id is not null,
order_id,
NULL
)
)
when grouping(trade_area_id) = 0
then count(
if(
order_rn = 1 and trade_area_id is not null and evaluation_id is not null,
order_id,
NULL
)
)
when grouping(city_id) = 0
then count(
if(
order_rn = 1 and city_id is not null and evaluation_id is not null,
order_id,
NULL
)
)
when grouping(brand_id) = 0
then count(
if(
brand_rn = 1 and brand_id is not null and evaluation_id is not null,
order_id,
NULL
)
)
when grouping(min_class_id) = 0
then count(
if(
min_class_rn = 1 and min_class_id is not null and evaluation_id is not null,
order_id,
NULL
)
)
when grouping(mid_class_id) = 0
then count(
if(
mid_class_rn = 1 and mid_class_id is not null and evaluation_id is not null ,
order_id,
NULL
)
)
when grouping(max_class_id) = 0
then count(
if(
max_class_rn = 1 and max_class_id is not null and evaluation_id is not null,
order_id,
NULL
)
)
when grouping(dt) = 0
then count(
if(
order_rn = 1 and dt is not null and evaluation_id is not null,
order_id,
NULL
)
)
else NULL
end as eva_order_cnt,
-- 差评单量:
case
when grouping(store_id) = 0
then count(
if(
order_rn = 1 and store_id is not null and evaluation_id is not null and geval_scores < 6,
order_id,
NULL
)
)
when grouping(trade_area_id) = 0
then count(
if(
order_rn = 1 and trade_area_id is not null and evaluation_id is not null and geval_scores < 6,
order_id,
NULL
)
)
when grouping(city_id) = 0
then count(
if(
order_rn = 1 and city_id is not null and evaluation_id is not null and geval_scores < 6,
order_id,
NULL
)
)
when grouping(brand_id) = 0
then count(
if(
brand_rn = 1 and brand_id is not null and evaluation_id is not null and geval_scores < 6,
order_id,
NULL
)
)
when grouping(min_class_id) = 0
then count(
if(
min_class_rn = 1 and min_class_id is not null and evaluation_id is not null and geval_scores < 6,
order_id,
NULL
)
)
when grouping(mid_class_id) = 0
then count(
if(
mid_class_rn = 1 and mid_class_id is not null and evaluation_id is not null and geval_scores < 6,
order_id,
NULL
)
)
when grouping(max_class_id) = 0
then count(
if(
max_class_rn = 1 and max_class_id is not null and evaluation_id is not null and geval_scores < 6,
order_id,
NULL
)
)
when grouping(dt) = 0
then count(
if(
order_rn = 1 and dt is not null and evaluation_id is not null and geval_scores < 6,
order_id,
NULL
)
)
else NULL
end as bad_eva_order_cnt,
-- 配送成交单量
case
when grouping(store_id) = 0
then count(
if(
order_rn = 1 and store_id is not null and delievery_id is not null,
order_id,
NULL
)
)
when grouping(trade_area_id) = 0
then count(
if(
order_rn = 1 and trade_area_id is not null and delievery_id is not null,
order_id,
NULL
)
)
when grouping(city_id) = 0
then count(
if(
order_rn = 1 and city_id is not null and delievery_id is not null,
order_id,
NULL
)
)
when grouping(brand_id) = 0
then count(
if(
brand_rn = 1 and brand_id is not null and delievery_id is not null,
order_id,
NULL
)
)
when grouping(min_class_id) = 0
then count(
if(
min_class_rn = 1 and min_class_id is not null and delievery_id is not null,
order_id,
NULL
)
)
when grouping(mid_class_id) = 0
then count(
if(
mid_class_rn = 1 and mid_class_id is not null and delievery_id is not null,
order_id,
NULL
)
)
when grouping(max_class_id) = 0
then count(
if(
max_class_rn = 1 and max_class_id is not null and delievery_id is not null,
order_id,
NULL
)
)
when grouping(dt) = 0
then count(
if(
order_rn = 1 and dt is not null and delievery_id is not null,
order_id,
NULL
)
)
else NULL
end as deliver_order_cnt,
-- 退款成交单量
case
when grouping(store_id) = 0
then count(
if(
order_rn = 1 and store_id is not null and refund_id is not null,
order_id,
NULL
)
)
when grouping(trade_area_id) = 0
then count(
if(
order_rn = 1 and trade_area_id is not null and refund_id is not null,
order_id,
NULL
)
)
when grouping(city_id) = 0
then count(
if(
order_rn = 1 and city_id is not null and refund_id is not null,
order_id,
NULL
)
)
when grouping(brand_id) = 0
then count(
if(
brand_rn = 1 and brand_id is not null and refund_id is not null,
order_id,
NULL
)
)
when grouping(min_class_id) = 0
then count(
if(
min_class_rn = 1 and min_class_id is not null and refund_id is not null,
order_id,
NULL
)
)
when grouping(mid_class_id) = 0
then count(
if(
mid_class_rn = 1 and mid_class_id is not null and refund_id is not null,
order_id,
NULL
)
)
when grouping(max_class_id) = 0
then count(
if(
max_class_rn = 1 and max_class_id is not null and refund_id is not null,
order_id,
NULL
)
)
when grouping(dt) = 0
then count(
if(
order_rn = 1 and dt is not null and refund_id is not null,
order_id,
NULL
)
)
else NULL
end as refund_order_cnt,
-- 小程序成交量
case
when grouping(store_id) = 0
then count(
if(
order_rn = 1 and store_id is not null and order_from = 'miniapp',
order_id,
NULL
)
)
when grouping(trade_area_id) = 0
then count(
if(
order_rn = 1 and trade_area_id is not null and order_from = 'miniapp',
order_id,
NULL
)
)
when grouping(city_id) = 0
then count(
if(
order_rn = 1 and city_id is not null and order_from = 'miniapp',
order_id,
NULL
)
)
when grouping(brand_id) = 0
then count(
if(
brand_rn = 1 and brand_id is not null and order_from = 'miniapp',
order_id,
NULL
)
)
when grouping(min_class_id) = 0
then count(
if(
min_class_rn = 1 and min_class_id is not null and order_from = 'miniapp',
order_id,
NULL
)
)
when grouping(mid_class_id) = 0
then count(
if(
mid_class_rn = 1 and mid_class_id is not null and order_from = 'miniapp',
order_id,
NULL
)
)
when grouping(max_class_id) = 0
then count(
if(
max_class_rn = 1 and max_class_id is not null and order_from = 'miniapp',
order_id,
NULL
)
)
when grouping(dt) = 0
then count(
if(
order_rn = 1 and dt is not null and order_from = 'miniapp',
order_id,
NULL
)
)
else NULL
end as miniapp_order_cnt,
-- 安卓成交量
case
when grouping(store_id) = 0
then count(
if(
order_rn = 1 and store_id is not null and order_from = 'android',
order_id,
NULL
)
)
when grouping(trade_area_id) = 0
then count(
if(
order_rn = 1 and trade_area_id is not null and order_from = 'android',
order_id,
NULL
)
)
when grouping(city_id) = 0
then count(
if(
order_rn = 1 and city_id is not null and order_from = 'android',
order_id,
NULL
)
)
when grouping(brand_id) = 0
then count(
if(
brand_rn = 1 and brand_id is not null and order_from = 'android',
order_id,
NULL
)
)
when grouping(min_class_id) = 0
then count(
if(
min_class_rn = 1 and min_class_id is not null and order_from = 'android',
order_id,
NULL
)
)
when grouping(mid_class_id) = 0
then count(
if(
mid_class_rn = 1 and mid_class_id is not null and order_from = 'android',
order_id,
NULL
)
)
when grouping(max_class_id) = 0
then count(
if(
max_class_rn = 1 and max_class_id is not null and order_from = 'android',
order_id,
NULL
)
)
when grouping(dt) = 0
then count(
if(
order_rn = 1 and dt is not null and order_from = 'android',
order_id,
NULL
)
)
else NULL
end as android_order_cnt,
-- 苹果成交量
case
when grouping(store_id) = 0
then count(
if(
order_rn = 1 and store_id is not null and order_from = 'ios',
order_id,
NULL
)
)
when grouping(trade_area_id) = 0
then count(
if(
order_rn = 1 and trade_area_id is not null and order_from = 'ios',
order_id,
NULL
)
)
when grouping(city_id) = 0
then count(
if(
order_rn = 1 and city_id is not null and order_from = 'ios',
order_id,
NULL
)
)
when grouping(brand_id) = 0
then count(
if(
brand_rn = 1 and brand_id is not null and order_from = 'ios',
order_id,
NULL
)
)
when grouping(min_class_id) = 0
then count(
if(
min_class_rn = 1 and min_class_id is not null and order_from = 'ios',
order_id,
NULL
)
)
when grouping(mid_class_id) = 0
then count(
if(
mid_class_rn = 1 and mid_class_id is not null and order_from = 'ios',
order_id,
NULL
)
)
when grouping(max_class_id) = 0
then count(
if(
max_class_rn = 1 and max_class_id is not null and order_from = 'ios',
order_id,
NULL
)
)
when grouping(dt) = 0
then count(
if(
order_rn = 1 and dt is not null and order_from = 'ios',
order_id,
NULL
)
)
else NULL
end as ios_order_cnt,
-- pc成交量
case
when grouping(store_id) = 0
then count(
if(
order_rn = 1 and store_id is not null and order_from = 'pcweb',
order_id,
NULL
)
)
when grouping(trade_area_id) = 0
then count(
if(
order_rn = 1 and trade_area_id is not null and order_from = 'pcweb',
order_id,
NULL
)
)
when grouping(city_id) = 0
then count(
if(
order_rn = 1 and city_id is not null and order_from = 'pcweb',
order_id,
NULL
)
)
when grouping(brand_id) = 0
then count(
if(
brand_rn = 1 and brand_id is not null and order_from = 'pcweb',
order_id,
NULL
)
)
when grouping(min_class_id) = 0
then count(
if(
min_class_rn = 1 and min_class_id is not null and order_from = 'pcweb',
order_id,
NULL
)
)
when grouping(mid_class_id) = 0
then count(
if(
mid_class_rn = 1 and mid_class_id is not null and order_from = 'pcweb',
order_id,
NULL
)
)
when grouping(max_class_id) = 0
then count(
if(
max_class_rn = 1 and max_class_id is not null and order_from = 'pcweb',
order_id,
NULL
)
)
when grouping(dt) = 0
then count(
if(
order_rn = 1 and dt is not null and order_from = 'pcweb',
order_id,
NULL
)
)
else NULL
end as pcweb_order_cnt,
dt
from t1
group by grouping sets(
dt,
(dt,province_id,province_name,city_id,city_name),
(dt,province_id,province_name,city_id,city_name,trade_area_id,trade_area_name),
(dt,province_id,province_name,city_id,city_name,trade_area_id,trade_area_name,store_id,store_name),
(dt,brand_id,brand_name),
(dt,max_class_id,max_class_name),
(dt,max_class_id,max_class_name,mid_class_id,mid_class_name),
(dt,max_class_id,max_class_name,mid_class_id,mid_class_name,min_class_id,min_class_name)
)
1.2 商品主题统计宽表
主要指标有:下单次数、下单件数、下单金额、被支付次数、被支付件数、被支付金额、被退款次数、被退款件数、被退款金额、被加入购物车次数、被加入购物车件数、被收藏次数、好评数、中评数、差评数。<br /> 维度有:商品、日期。
创建商品主题统计宽表(HIVE执行)
create table xxx.dws_sku_daycount ( dt STRING, sku_id string comment 'sku_id', sku_name string comment '商品名称', order_count bigint comment '被下单次数', order_num bigint comment '被下单件数', order_amount decimal(38,2) comment '被下单金额', payment_count bigint comment '被支付次数', payment_num bigint comment '被支付件数', payment_amount decimal(38,2) comment '被支付金额', refund_count bigint comment '被退款次数', refund_num bigint comment '被退款件数', refund_amount decimal(38,2) comment '被退款金额', cart_count bigint comment '被加入购物车次数', cart_num bigint comment '被加入购物车件数', favor_count bigint comment '被收藏次数', evaluation_good_count bigint comment '好评数', evaluation_mid_count bigint comment '中评数', evaluation_bad_count bigint comment '差评数' ) COMMENT '每日商品行为' --PARTITIONED BY(dt STRING) ROW format delimited fields terminated BY '\t' stored AS orc tblproperties ('orc.compress' = 'SNAPPY');
需求分析: ```properties 主要指标有: 下单次数、 下单件数、 下单金额、 被支付次数、 被支付件数、 被支付金额、 被退款次数、 被退款件数、 被退款金额、 被加入购物车次数、 被加入购物车件数、 被收藏次数、 好评数、 中评数、 差评数。
维度有: 商品 + 日期。
下单次数、 下单件数、 下单金额: 表: 订单明细宽表 dwb_order_detail 涉及字段: 维度字段: 商品维度: goods_id, goods_name 日期: dt 指标字段: 次数: 产生了多少个订单 order_id 件数: buy_num 金额: total_price
被支付次数、被支付件数、被支付金额: 表: 订单明细宽表 dwb_order_detail 过滤出已经支付的订单数据: is_pay = 1 and order_state not in(1,7) 涉及字段: 维度字段: 商品维度: goods_id, goods_name 日期: dt 指标字段: 次数: 产生了多少个订单 order_id 件数: buy_num 金额: total_price
被退款次数、被退款件数、被退款金额: 表: 订单明细宽表 dwb_order_detail 过滤出已经退款的订单数据: refund_id is not null and refund_state = 5 (退款完成) 涉及字段: 维度字段: 商品维度: goods_id, goods_name 日期: dt 指标字段: 次数: 产生了多少个订单 order_id 件数: buy_num 金额: total_price
被加入购物车次数、被加入购物车件数: 表: dwd.fact_shop_cart(购物车表) 在此表放置的数据, 都是购物车的相关数据 和 dwb_goods_detail(商品明细宽表) 关联条件: c.goods_id = g.goods_id and c.end_date = ‘9999-99-99’ 涉及字段: 维度字段: 商品维度: c.goods_id 和 g.goods_name 时间维度: c.create_time (此处不要使用 start_date, 因为这个起始时间, 有可能是采集数据时间, 并不是数据创建时间) 指标字段: 次数: c.id 件数: c.buy_num
被收藏次数: 表: dwd.fact_goods_collect(收藏表) 和 dwb_goods_detail(商品明细宽表) 关联条件: c.goods_id = g.goods_id and c.end_date = ‘9999-99-99’ 涉及字段: 维度字段: 商品维度: c.goods_id , g.goods_name 时间维度: c.create_time
维度字段:
次数: c.id
好评数、中评数、差评数:
表: dwd.fact_goods_evaluation_detail(商品评价表) 和 dwb_goods_detail(商品明细宽表)
关联条件:
eva.goods_id = g.goods_id and eva.end_date = ‘9999-99-99’
判断好评 中评 差评呢?
eva.geval_scores_goods 10分制
好评: 大于 8分, 中评 6~8 差评 6以下
涉及字段:
涉及维度:
商品维度: eva.goods_id 和 g.goods_name
时间维度: eva.create_time
指标字段:
数量: eva.id
- SQL实现:
- 目前: 将每一个指定的统计结果, 都放置在一个临表:
```sql
-- 对订单表进行指标计算的时候, 需要先去重(由于我们是基于商品统计, 需要保证每笔订单中有不同的商品, 相同商品去除掉)
with t1 as (
select
-- 维度字段:
dt, -- 订单生成时间
pay_time, --支付时间
apply_date, -- 退款时间
goods_id,
goods_name,
-- 指标字段
order_id, -- 计算次数
buy_num , -- 商品数量, 计算件数
total_price , -- 商品金额, 计算金额
-- 判断字段:
is_pay , -- 是否支付, 为1表示支付
order_state , -- 订单状态, 不能为 1和7
refund_id , -- 退款id 不能为null
refund_state , -- 退款状态 必须为 5
-- 去重操作
row_number() over(partition by order_id,goods_id) as rn1
from hive.bj59_yp_dwb_jiale.dwb_order_detail
),
-- 下单次数、 下单件数、 下单金额:
order_goods_1 as (
select
dt,
goods_id,
goods_name,
count(order_id) as order_count,
sum(buy_num) as order_num,
sum(total_price) as order_amount
from t1 where rn1 =1
group by goods_id,goods_name,dt
),
-- 被支付次数、被支付件数、被支付金额:
payment_2 AS (
select
substr(pay_time,1,10) as dt,
goods_id,
goods_name,
count(order_id) as payment_count,
sum(buy_num) as payment_num,
sum(total_price) as payment_amount
from t1 where rn1 =1 and is_pay = 1 and order_state not in(1,7)
group by substr(pay_time,1,10),goods_id,goods_name
),
-- 被退款的次数, 被退款的件数, 被退款的金额
refund_3 AS (
SELECT
substr(apply_date,1,10) AS dt ,
goods_id,
goods_name,
count(order_id) as refund_count,
sum(buy_num) as refund_num,
sum(total_price) as refund_amount
FROM t1 WHERE rn1 = 1 AND refund_id NOT NULL AND refund_state = 5
GROUP BY substr(apply_date,1,10), goods_id,goods_name
),
-- 被收藏次数
favor_4 as (
SELECT
substr(c.create_time,1,10) as dt,
c.goods_id,
g.goods_name,
count(c.id) AS favor_count
FROM hive.xxxx.fact_goods_collect c
JOIN hive.bj59_yp_dwb_jiale.dwb_goods_detail g
ON c.goods_id = g.goods_id and c.end_date = '9999-99-99'
GROUP BY substr(c.create_time,1,10),c.goods_id,g.goods_name
)
-- 加入购物车的次数和 件数
cart_5 as (
select
substr(c.create_time,1,10) as dt,
c.goods_id,
g.goods_name,
count(c.id) as cart_count,
sum(c.buy_num) as cart_num
from hive.xxxx.fact_shop_cart c
JOIN hive.bj59_yp_dwb_jiale.dwb_goods_detail g
ON c.goods_id = g.goods_id and c.end_date = '9999-99-99'
group by substr(c.create_time,1,10),c.goods_id,g.goods_name
)
eva_6 as (
select
substr(eva.create_time,1,10) as dt,
eva.goods_id,
g.goods_name,
count(
if(
eva.geval_scores_goods is not null and eva.geval_scores_goods > 8,
eva.id,
null
)
) as evaluation_good_count,
count(
if(
eva.geval_scores_goods is not null and eva.geval_scores_goods between 6 and 8,
eva.id,
null
)
) as evaluation_mid_count,
count(
if(
eva.geval_scores_goods is not null and eva.geval_scores_goods < 6,
eva.id,
null
)
) as evaluation_bad_count
from hive.xxxx.fact_goods_evaluation_detail eva
join hive.bj59_yp_dwb_jiale.dwb_goods_detail g
on eva.goods_id = g.goods_id and eva.end_date = '9999-99-99'
group by substr(eva.create_time,1,10),eva.goods_id,g.goods_name
)
- 第二步: 将结果灌入到目标表: 需要将多个临表全部合并在一起, 灌入到目标表
-- 对订单表进行指标计算的时候, 需要先去重(由于我们是基于商品统计, 需要保证每笔订单中有不同的商品, 相同商品去除掉)
insert into hive.bj59_yp_dws_jiale.dws_sku_daycount
with t1 as (
select
-- 维度字段:
dt, -- 订单生成时间
pay_time, --支付时间
apply_date, -- 退款时间
goods_id,
goods_name,
-- 指标字段
order_id, -- 计算次数
buy_num , -- 商品数量, 计算件数
total_price , -- 商品金额, 计算金额
-- 判断字段:
is_pay , -- 是否支付, 为1表示支付
order_state , -- 订单状态, 不能为 1和7
refund_id , -- 退款id 不能为null
refund_state , -- 退款状态 必须为 5
-- 去重操作
row_number() over(partition by order_id,goods_id) as rn1
from hive.xxxx.dwb_order_detail
),
-- 下单次数、 下单件数、 下单金额:
order_goods_1 as (
select
dt,
goods_id,
goods_name,
count(order_id) as order_count,
sum(buy_num) as order_num,
sum(total_price) as order_amount
from t1 where rn1 =1
group by goods_id,goods_name,dt
),
-- 被支付次数、被支付件数、被支付金额:
payment_2 AS (
select
substr(pay_time,1,10) as dt,
goods_id,
goods_name,
count(order_id) as payment_count,
sum(buy_num) as payment_num,
sum(total_price) as payment_amount
from t1 where rn1 =1 and is_pay = 1 and order_state not in(1,7)
group by substr(pay_time,1,10),goods_id,goods_name
),
-- 被退款的次数, 被退款的件数, 被退款的金额
refund_3 AS (
SELECT
substr(apply_date,1,10) AS dt ,
goods_id,
goods_name,
count(order_id) as refund_count,
sum(buy_num) as refund_num,
sum(total_price) as refund_amount
FROM t1 WHERE rn1 = 1 AND refund_id is NOT NULL AND refund_state = 5
GROUP BY substr(apply_date,1,10), goods_id,goods_name
),
-- 被收藏次数
favor_4 as (
SELECT
substr(c.create_time,1,10) as dt,
c.goods_id,
g.goods_name,
count(c.id) AS favor_count
FROM hive.bj59_yp_dwd_jiale.fact_goods_collect c
JOIN hive.xxxx.dwb_goods_detail g
ON c.goods_id = g.id and c.end_date = '9999-99-99'
GROUP BY substr(c.create_time,1,10),c.goods_id,g.goods_name
),
-- 加入购物车的次数和 件数
cart_5 as (
select
substr(c.create_time,1,10) as dt,
c.goods_id,
g.goods_name,
count(c.id) as cart_count,
sum(c.buy_num) as cart_num
from hive.bj59_yp_dwd_jiale.fact_shop_cart c
JOIN hive.xxxx.dwb_goods_detail g
ON c.goods_id = g.id and c.end_date = '9999-99-99'
group by substr(c.create_time,1,10),c.goods_id,g.goods_name
),
eva_6 as (
select
substr(eva.create_time,1,10) as dt,
eva.goods_id,
g.goods_name,
count(
if(
eva.geval_scores_goods is not null and eva.geval_scores_goods > 8,
eva.id,
null
)
) as evaluation_good_count,
count(
if(
eva.geval_scores_goods is not null and eva.geval_scores_goods between 6 and 8,
eva.id,
null
)
) as evaluation_mid_count,
count(
if(
eva.geval_scores_goods is not null and eva.geval_scores_goods < 6,
eva.id,
null
)
) as evaluation_bad_count
from hive.bj59_yp_dwd_jiale.fact_goods_evaluation_detail eva
join hive.xxxx.dwb_goods_detail g
on eva.goods_id = g.id and eva.end_date = '9999-99-99'
group by substr(eva.create_time,1,10),eva.goods_id,g.goods_name
),
t7 as (
select
coalesce(t1.dt,t2.dt,t3.dt,t4.dt,t5.dt,t6.dt) as dt,
coalesce(t1.goods_id,t2.goods_id,t3.goods_id,t4.goods_id,t5.goods_id,t6.goods_id) as sku_id,
coalesce(t1.goods_name,t2.goods_name,t3.goods_name,t4.goods_name,t5.goods_name,t6.goods_name) as sku_name,
coalesce(t1.order_count,0) as order_count,
coalesce(t1.order_num,0) as order_num,
coalesce(t1.order_amount,0) as order_amount,
coalesce(t2.payment_count,0) as payment_count,
coalesce(t2.payment_num,0) as payment_num,
coalesce(t2.payment_amount,0) as payment_amount,
coalesce(t3.refund_count,0) as refund_count,
coalesce(t3.refund_num,0) as refund_num,
coalesce(t3.refund_amount,0) as refund_amount,
coalesce(t5.cart_count,0) as cart_count,
coalesce(t5.cart_num,0) as cart_num,
coalesce(t4.favor_count,0) as favor_count,
coalesce(t6.evaluation_good_count,0) as evaluation_good_count,
coalesce(t6.evaluation_mid_count,0) as evaluation_mid_count,
coalesce(t6.evaluation_bad_count,0) as evaluation_bad_count
from order_goods_1 t1
full join payment_2 t2 on t1.dt = t2.dt and t1.goods_id = t2.goods_id
full join refund_3 t3 on t3.dt = t2.dt and t3.goods_id = t2.goods_id
full join favor_4 t4 on t4.dt = t3.dt and t4.goods_id = t3.goods_id
full join cart_5 t5 on t5.dt = t4.dt and t5.goods_id = t4.goods_id
full join eva_6 t6 on t6.dt = t5.dt and t6.goods_id = t5.goods_id
)
select
dt,
sku_id,
sku_name,
sum(order_count) as order_count,
sum(order_num) as order_num,
sum(order_amount) as order_amount,
sum(payment_count) as payment_count,
sum(payment_num) as payment_num,
sum(payment_amount) as payment_amount,
sum(refund_count) as refund_count,
sum(refund_num) as refund_num,
sum(refund_amount) as refund_amount,
sum(cart_count) as cart_count,
sum(cart_num) as cart_num,
sum(favor_count) as favor_count,
sum(evaluation_good_count) as evaluation_good_count,
sum(evaluation_mid_count) as evaluation_mid_count,
sum(evaluation_bad_count) as evaluation_bad_count
from t7
group by dt,sku_id,sku_name;-- 对订单表进行指标计算的时候, 需要先去重(由于我们是基于商品统计, 需要保证每笔订单中有不同的商品, 相同商品去除掉)
insert into hive.bj59_yp_dws_jiale.dws_sku_daycount
with t1 as (
select
-- 维度字段:
dt, -- 订单生成时间
pay_time, --支付时间
apply_date, -- 退款时间
goods_id,
goods_name,
-- 指标字段
order_id, -- 计算次数
buy_num , -- 商品数量, 计算件数
total_price , -- 商品金额, 计算金额
-- 判断字段:
is_pay , -- 是否支付, 为1表示支付
order_state , -- 订单状态, 不能为 1和7
refund_id , -- 退款id 不能为null
refund_state , -- 退款状态 必须为 5
-- 去重操作
row_number() over(partition by order_id,goods_id) as rn1
from hive.xxxx.dwb_order_detail
),
-- 下单次数、 下单件数、 下单金额:
order_goods_1 as (
select
dt,
goods_id,
goods_name,
count(order_id) as order_count,
sum(buy_num) as order_num,
sum(total_price) as order_amount
from t1 where rn1 =1
group by goods_id,goods_name,dt
),
-- 被支付次数、被支付件数、被支付金额:
payment_2 AS (
select
substr(pay_time,1,10) as dt,
goods_id,
goods_name,
count(order_id) as payment_count,
sum(buy_num) as payment_num,
sum(total_price) as payment_amount
from t1 where rn1 =1 and is_pay = 1 and order_state not in(1,7)
group by substr(pay_time,1,10),goods_id,goods_name
),
-- 被退款的次数, 被退款的件数, 被退款的金额
refund_3 AS (
SELECT
substr(apply_date,1,10) AS dt ,
goods_id,
goods_name,
count(order_id) as refund_count,
sum(buy_num) as refund_num,
sum(total_price) as refund_amount
FROM t1 WHERE rn1 = 1 AND refund_id is NOT NULL AND refund_state = 5
GROUP BY substr(apply_date,1,10), goods_id,goods_name
),
-- 被收藏次数
favor_4 as (
SELECT
substr(c.create_time,1,10) as dt,
c.goods_id,
g.goods_name,
count(c.id) AS favor_count
FROM hive.bj59_yp_dwd_jiale.fact_goods_collect c
JOIN hive.xxxx.dwb_goods_detail g
ON c.goods_id = g.id and c.end_date = '9999-99-99'
GROUP BY substr(c.create_time,1,10),c.goods_id,g.goods_name
),
-- 加入购物车的次数和 件数
cart_5 as (
select
substr(c.create_time,1,10) as dt,
c.goods_id,
g.goods_name,
count(c.id) as cart_count,
sum(c.buy_num) as cart_num
from hive.bj59_yp_dwd_jiale.fact_shop_cart c
JOIN hive.xxxx.dwb_goods_detail g
ON c.goods_id = g.id and c.end_date = '9999-99-99'
group by substr(c.create_time,1,10),c.goods_id,g.goods_name
),
eva_6 as (
select
substr(eva.create_time,1,10) as dt,
eva.goods_id,
g.goods_name,
count(
if(
eva.geval_scores_goods is not null and eva.geval_scores_goods > 8,
eva.id,
null
)
) as evaluation_good_count,
count(
if(
eva.geval_scores_goods is not null and eva.geval_scores_goods between 6 and 8,
eva.id,
null
)
) as evaluation_mid_count,
count(
if(
eva.geval_scores_goods is not null and eva.geval_scores_goods < 6,
eva.id,
null
)
) as evaluation_bad_count
from hive.bj59_yp_dwd_jiale.fact_goods_evaluation_detail eva
join hive.xxxx.dwb_goods_detail g
on eva.goods_id = g.id and eva.end_date = '9999-99-99'
group by substr(eva.create_time,1,10),eva.goods_id,g.goods_name
),
t7 as (
select
coalesce(t1.dt,t2.dt,t3.dt,t4.dt,t5.dt,t6.dt) as dt,
coalesce(t1.goods_id,t2.goods_id,t3.goods_id,t4.goods_id,t5.goods_id,t6.goods_id) as sku_id,
coalesce(t1.goods_name,t2.goods_name,t3.goods_name,t4.goods_name,t5.goods_name,t6.goods_name) as sku_name,
coalesce(t1.order_count,0) as order_count,
coalesce(t1.order_num,0) as order_num,
coalesce(t1.order_amount,0) as order_amount,
coalesce(t2.payment_count,0) as payment_count,
coalesce(t2.payment_num,0) as payment_num,
coalesce(t2.payment_amount,0) as payment_amount,
coalesce(t3.refund_count,0) as refund_count,
coalesce(t3.refund_num,0) as refund_num,
coalesce(t3.refund_amount,0) as refund_amount,
coalesce(t5.cart_count,0) as cart_count,
coalesce(t5.cart_num,0) as cart_num,
coalesce(t4.favor_count,0) as favor_count,
coalesce(t6.evaluation_good_count,0) as evaluation_good_count,
coalesce(t6.evaluation_mid_count,0) as evaluation_mid_count,
coalesce(t6.evaluation_bad_count,0) as evaluation_bad_count
from order_goods_1 t1
full join payment_2 t2 on t1.dt = t2.dt and t1.goods_id = t2.goods_id
full join refund_3 t3 on t3.dt = t2.dt and t3.goods_id = t2.goods_id
full join favor_4 t4 on t4.dt = t3.dt and t4.goods_id = t3.goods_id
full join cart_5 t5 on t5.dt = t4.dt and t5.goods_id = t4.goods_id
full join eva_6 t6 on t6.dt = t5.dt and t6.goods_id = t5.goods_id
)
select
dt,
sku_id,
sku_name,
sum(order_count) as order_count,
sum(order_num) as order_num,
sum(order_amount) as order_amount,
sum(payment_count) as payment_count,
sum(payment_num) as payment_num,
sum(payment_amount) as payment_amount,
sum(refund_count) as refund_count,
sum(refund_num) as refund_num,
sum(refund_amount) as refund_amount,
sum(cart_count) as cart_count,
sum(cart_num) as cart_num,
sum(favor_count) as favor_count,
sum(evaluation_good_count) as evaluation_good_count,
sum(evaluation_mid_count) as evaluation_mid_count,
sum(evaluation_bad_count) as evaluation_bad_count
from t7
group by dt,sku_id,sku_name;
1.3 用户主题日统计宽表
作业
2 DM层实现操作
DM层的作用: 进行上卷细化统计操作
创建DM层的库(hive中执行)
create database IF NOT EXISTS xxx;
2.1 销售主题统计宽表
可分析的主要指标有:销售收入、平台收入、配送成交额、小程序成交额、安卓APP成交额、苹果APP成交额、PC商城成交额、订单量、参评单量、差评单量、配送单量、退款单量、小程序订单量、安卓APP订单量、苹果APP订单量、PC商城订单量。
维度有:日期(天(已经统计过), 周, 月, 年)、城市、商圈、店铺、品牌、大类、中类、小类建表语句: ```sql CREATE TABLE xxx.dm_sale( date_time string COMMENT ‘统计日期,不能用来分组统计’, time_type string COMMENT ‘统计时间维度:year、month、week、date’, year_code string COMMENT ‘年code’, year_month string COMMENT ‘年月’, month_code string COMMENT ‘月份编码’, day_month_num string COMMENT ‘一月第几天’, dim_date_id string COMMENT ‘日期’, year_week_name_cn string COMMENT ‘年中第几周’,
group_type string COMMENT ‘分组类型:store,trade_area,city,brand,min_class,mid_class,max_class,all’, city_id string COMMENT ‘城市id’, city_name string COMMENT ‘城市name’, trade_area_id string COMMENT ‘商圈id’, trade_area_name string COMMENT ‘商圈名称’, store_id string COMMENT ‘店铺的id’, store_name string COMMENT ‘店铺名称’, brand_id string COMMENT ‘品牌id’, brand_name string COMMENT ‘品牌名称’, max_class_id string COMMENT ‘商品大类id’, max_class_name string COMMENT ‘大类名称’, mid_class_id string COMMENT ‘中类id’, mid_class_name string COMMENT ‘中类名称’, min_class_id string COMMENT ‘小类id’, min_class_name string COMMENT ‘小类名称’, — =======统计======= — 销售收入 sale_amt DECIMAL(38,2) COMMENT ‘销售收入’, — 平台收入 plat_amt DECIMAL(38,2) COMMENT ‘平台收入’, — 配送成交额 deliver_sale_amt DECIMAL(38,2) COMMENT ‘配送成交额’, — 小程序成交额 mini_app_sale_amt DECIMAL(38,2) COMMENT ‘小程序成交额’, — 安卓APP成交额 android_sale_amt DECIMAL(38,2) COMMENT ‘安卓APP成交额’, — 苹果APP成交额 ios_sale_amt DECIMAL(38,2) COMMENT ‘苹果APP成交额’, — PC商城成交额 pcweb_sale_amt DECIMAL(38,2) COMMENT ‘PC商城成交额’, — 成交单量 order_cnt BIGINT COMMENT ‘成交单量’, — 参评单量 eva_order_cnt BIGINT COMMENT ‘参评单量comment=>cmt’, — 差评单量 bad_eva_order_cnt BIGINT COMMENT ‘差评单量negtive-comment=>ncmt’, — 配送成交单量 deliver_order_cnt BIGINT COMMENT ‘配送单量’, — 退款单量 refund_order_cnt BIGINT COMMENT ‘退款单量’, — 小程序成交单量 miniapp_order_cnt BIGINT COMMENT ‘小程序成交单量’, — 安卓APP订单量 android_order_cnt BIGINT COMMENT ‘安卓APP订单量’, — 苹果APP订单量 ios_order_cnt BIGINT COMMENT ‘苹果APP订单量’, — PC商城成交单量 pcweb_order_cnt BIGINT COMMENT ‘PC商城成交单量’ ) COMMENT ‘销售主题宽表’ ROW format delimited fields terminated BY ‘\t’ stored AS orc tblproperties (‘orc.compress’ = ‘SNAPPY’);
<a name="xU011"></a>
#### 统计分析: 先按照天来统计 (前面已经统计过了)
```sql
insert into xxxxx.dm_sale
select
t1.dt as date_time,
'date' as time_type ,
t2.year_code ,
t2.year_month ,
t2.month_code ,
t2.day_month_num ,
t2.dim_date_id ,
t2.year_week_name_cn ,
-- 维度
t1.group_type,
t1.city_id ,
t1.city_name ,
t1.trade_area_id ,
t1.trade_area_name,
t1.store_id,
t1.store_name ,
t1.brand_id,
t1.brand_name,
t1.max_class_id,
t1.max_class_name,
t1.mid_class_id,
t1.mid_class_name,
t1.min_class_id,
t1.min_class_name,
-- 指标
t1.sale_amt,
t1.plat_amt,
t1.deliver_sale_amt ,
t1.mini_app_sale_amt ,
t1.android_sale_amt ,
t1.ios_sale_amt ,
t1.pcweb_sale_amt,
t1.order_cnt,
t1.eva_order_cnt ,
t1.bad_eva_order_cnt ,
t1.deliver_order_cnt ,
t1.refund_order_cnt ,
t1.miniapp_order_cnt ,
t1.android_order_cnt,
t1.ios_order_cnt ,
t1.pcweb_order_cnt
from xxxxx.dws_sale_daycount t1
left join xxxx.dim_date t2 on t1.dt = t2.date_code ;
统计分析:按周来统计
统计分析: 按月来统计
统计分析: 按年来统计
2.2 商品主题统计宽表
主要指标有:下单次数、下单件数、下单金额、被支付次数、被支付件数、被支付金额、被退款次数、被退款件数、被退款金额、被加入购物车次数、被加入购物车件数、被收藏次数、好评数、中评数、差评数。
维度有:商品+日期(总累计值,近30天的数据)。
构建DM层商品主题统计宽表(hive中执行):
create table xxx.dm_sku ( sku_id string comment 'sku_id', sku_name string comment '商品名称', order_last_30d_count bigint comment '最近30日被下单次数', order_last_30d_num bigint comment '最近30日被下单件数', order_last_30d_amount decimal(38,2) comment '最近30日被下单金额', order_count bigint comment '累积被下单次数', order_num bigint comment '累积被下单件数', order_amount decimal(38,2) comment '累积被下单金额', payment_last_30d_count bigint comment '最近30日被支付次数', payment_last_30d_num bigint comment '最近30日被支付件数', payment_last_30d_amount decimal(38,2) comment '最近30日被支付金额', payment_count bigint comment '累积被支付次数', payment_num bigint comment '累积被支付件数', payment_amount decimal(38,2) comment '累积被支付金额', refund_last_30d_count bigint comment '最近三十日退款次数', refund_last_30d_num bigint comment '最近三十日退款件数', refund_last_30d_amount decimal(38,2) comment '最近三十日退款金额', refund_count bigint comment '累积退款次数', refund_num bigint comment '累积退款件数', refund_amount decimal(38,2) comment '累积退款金额', cart_last_30d_count bigint comment '最近30日被加入购物车次数', cart_last_30d_num bigint comment '最近30日被加入购物车件数', cart_count bigint comment '累积被加入购物车次数', cart_num bigint comment '累积被加入购物车件数', favor_last_30d_count bigint comment '最近30日被收藏次数', favor_count bigint comment '累积被收藏次数', evaluation_last_30d_good_count bigint comment '最近30日好评数', evaluation_last_30d_mid_count bigint comment '最近30日中评数', evaluation_last_30d_bad_count bigint comment '最近30日差评数', evaluation_good_count bigint comment '累积好评数', evaluation_mid_count bigint comment '累积中评数', evaluation_bad_count bigint comment '累积差评数' ) COMMENT '商品主题宽表' ROW format delimited fields terminated BY '\t' stored AS orc tblproperties ('orc.compress' = 'SNAPPY');
计算总累计值
- 计算最近30天
- 合并处理
增量实现方案