今日内容:

  • 1- DWS的商品主题的日统计宽表 (操作)
  • 2- DWS的用户主题的日统计宽表(作业)
  • 3- DM层实现操作: (操作)
    • 销售主题统计宽表实现
    • 商品主题统计宽表实现

      1- DWS层实现操作

1.1 销售主题的日统计宽表

可分析的主要指标有:销售收入、平台收入、配送成交额、小程序成交额、安卓APP成交额、苹果APP成交额、PC商城成交额、订单量、参评单量、差评单量、配送单量、退款单量、小程序订单量、安卓APP订单量、苹果APP订单量、PC商城订单量。
维度有:日期、城市、商圈、店铺、品牌、大类、中类、小类。

  1. 维度组合:
  2. 日期:
  3. 日期 + 城市
  4. 日期 + 城市 + 商圈
  5. 日期 + 城市 + 商圈 + 店铺
  6. 日期 + 品牌
  7. 日期 + 大类
  8. 日期 + 大类 + 中类
  9. 日期 + 大类 + 中类 + 小类
  10. 16 * 8 = 128 个需求指标结果

分析, 当前需求统计的这些维度 和 指标, 需要涉及到那些表, 以及涉及到那些字段呢?

维度字段: 
    日期: dwb_order_detail.dt
    城市: dwb_shop_detail: city_id 和 city_name
    商圈: dwb_shop_detail: trade_area_id 和 trade_area_name
    店铺: dwb_shop_detail: id 和 store_name
    品牌: dwb_goods_detail: brand_id 和 brand_name
    大类: dwb_goods_detail: max_class_id 和 max_class_name
    中类: dwb_goods_detail: mid_class_id 和 mid_class_name
    小类: dwb_goods_detail: min_class_id 和 min_class_name

指标字段:  
    订单量相关指标: dwb_order_detail.order_id
    订单销售收入(销售收入, 小程序, 安卓, 苹果, pc端):dwb_order_detail.order_amount
    平台收入: dwb_order_detail.plat_fee
    配送费: wb_order_detail.delivery_fee

 涉及表: 
     订单明细宽表(当前主题的事实表): dwb_order_detail  (事实表)
     店铺明细宽表: dwb_shop_detail  (维度表)
     商品明细宽表: dwb_goods_detail (维度表)

关联条件: 
    订单表 和 店铺表:  
        订单明细宽表.store_id =  店铺明细宽表.id
    订单表 和 商品表: 
        订单明细宽表.goods_id = 商品明细宽表.id

思考: 当前这个是三种数仓模型那一种呢?  星型模型

是否需要过滤一些操作呢? 
    1-  保证必须是支付状态: is_pay =  1 
    2-  保证订单状态:  order_state 不能是 1(已下单, 没有付款) 和 7 (已取消)

首先, 先创建DWS层库 和 表(销售主题日统计宽表) (在HIVE中重新建表)

-- 创建库:
create database if not exists bj59_yp_dws_jiale;

-- 创建表(指标字段 + 维度字段 + 经验字段 ): 
DROP TABLE IF EXISTS bj59_yp_dws_jiale.dws_sale_daycount; 
CREATE TABLE bj59_yp_dws_jiale.dws_sale_daycount( 
  --dt STRING, 
  province_id string COMMENT '省份id', 
  province_name string COMMENT '省份名称', 
  city_id string COMMENT '城市id', 
  city_name string COMMENT '城市name', 
  trade_area_id string COMMENT '商圈id',
  trade_area_name string COMMENT '商圈名称', 
  store_id string COMMENT '店铺的id', 
  store_name string COMMENT '店铺名称', 
  brand_id string COMMENT '品牌id', 
  brand_name string COMMENT '品牌名称',
  max_class_id string COMMENT '商品大类id',
  max_class_name string COMMENT '大类名称',
  mid_class_id string COMMENT '中类id', 
  mid_class_name string COMMENT '中类名称', 
  min_class_id string COMMENT '小类id',
  min_class_name string COMMENT '小类名称', 
  -- 用于标记数据结果是按照那个维度来统计的一个经验字段
  group_type string COMMENT '分组类型:store,trade_area,city,brand,min_class,mid_class,max_class, all',
  -- =======日统计======= 
  -- 销售收入 
  sale_amt DECIMAL(38,2) COMMENT '销售收入',
  -- 平台收入 
  plat_amt DECIMAL(38,2) COMMENT '平台收入', 
  -- 配送成交额 
  deliver_sale_amt DECIMAL(38,2) COMMENT '配送成交额', 
  -- 小程序成交额 
  mini_app_sale_amt DECIMAL(38,2) COMMENT '小程序成交额', 
  -- 安卓APP成交额 
  android_sale_amt DECIMAL(38,2) COMMENT '安卓APP成交额', 
  -- 苹果APP成交额 
  ios_sale_amt DECIMAL(38,2) COMMENT '苹果APP成交额', 
  -- PC商城成交额 
  pcweb_sale_amt DECIMAL(38,2) COMMENT 'PC商城成交额', 
  -- 成交单量 
  order_cnt BIGINT COMMENT '成交单量',
  -- 参评单量 
  eva_order_cnt BIGINT COMMENT '参评单量comment=>cmt', 
  -- 差评单量 
  bad_eva_order_cnt BIGINT COMMENT '差评单量negtive-comment=>ncmt', 
  -- 配送成交单量 
  deliver_order_cnt BIGINT COMMENT '配送单量', 
  -- 退款单量 
  refund_order_cnt BIGINT COMMENT '退款单量', 
  -- 小程序成交单量 
  miniapp_order_cnt BIGINT COMMENT '小程序成交单量',
  -- 安卓APP订单量 
  android_order_cnt BIGINT COMMENT '安卓APP订单量', 
  -- 苹果APP订单量 
  ios_order_cnt BIGINT COMMENT '苹果APP订单量', 
  -- PC商城成交单量 
  pcweb_order_cnt BIGINT COMMENT 'PC商城成交单量' 
)
COMMENT '销售主题日统计宽表' 
PARTITIONED BY(dt STRING) 
ROW format delimited fields terminated BY '\t' 
stored AS orc tblproperties ('orc.compress' = 'SNAPPY');
  • SQL实现操作

image.png

-- 第一步: 对数据进行去重操作, 通过row_number实现
insert into  hive.bj59_yp_dws_jiale.dws_sale_daycount
with t1 as (
    select 
        -- 维度字段:
        o.dt,   -- 日期维度
        s.province_id ,
        s.province_name ,
        s.city_id ,
        s.city_name , -- 城市维度
        s.trade_area_id ,
        s.trade_area_name , -- 商圈维度
        o.store_id ,
        s.store_name , -- 店铺维度
        g.brand_id ,
        g.brand_name, -- 品牌维度
        g.max_class_id ,
        g.max_class_name , -- 大类
        g.mid_class_id ,
        g.mid_class_name ,  -- 中类
        g.min_class_id ,
        g.min_class_name ,  -- 小类

        -- 指标字段
        o.order_id , -- 订单id 计算订单量相关指标
        o.order_amount,  -- 订单销售收入
        o.total_price , -- 商品销售收入
        o.plat_fee , -- 平台分润
        o.delivery_fee , -- 配送费用

        -- 用于判断的字段
        o.order_from ,  -- 渠道(小程序, 安卓, 苹果, pc)
        o.evaluation_id , -- 评价表 id  , 判断是否有参评
        o.delievery_id , -- 配送表id, 判断是否有配送
        o.geval_scores , -- 综合评分: 10分制
        o.refund_id , -- 退款表id,判断是否有退款

        -- 进行去重操作:
        -- 计算 日期, 日期+城市, 日期+城市+商圈 , 日期+城市+商圈+店铺
        row_number() over(partition by o.order_id) as order_rn,
        -- 计算 日期 + 品牌 : 第一个去重计算 订单量  第二个去重 计算 销售额
        row_number() over(partition by o.order_id,g.brand_id) as brand_rn,
        row_number() over(partition by o.order_id,o.goods_id ,g.brand_id) as brand_goods_rn,
        -- 计算 日期 + 大类
        row_number() over(partition by o.order_id,g.max_class_id) as max_class_rn,
        row_number() over(partition by o.order_id,o.goods_id ,g.max_class_id) as max_class_goods_rn,
        -- 计算 日期 + 大类 + 中类
        row_number() over(partition by o.order_id,g.max_class_id,g.mid_class_id) as mid_class_rn,
        row_number() over(partition by o.order_id,o.goods_id ,g.max_class_id,g.mid_class_id) as mid_class_goods_rn,
        -- 计算 日期 + 大类 + 中类 + 小类
        row_number() over(partition by o.order_id,g.max_class_id,g.mid_class_id,g.min_class_id) as min_class_rn,
        row_number() over(partition by o.order_id,o.goods_id ,g.max_class_id,g.mid_class_id,g.min_class_id) as min_class_goods_rn
    from hive.bj59_yp_dwb_jiale.dwb_order_detail  o
        left join  hive.bj59_yp_dwb_jiale.dwb_shop_detail  s on o.store_id  = s.id
        left join  hive.bj59_yp_dwb_jiale.dwb_goods_detail g on  o.goods_id  = g.id
    where o.is_pay = 1 and  o.order_state not in(1,7)
)
select 
    -- 维度字段
    province_id, 
      province_name , 
      city_id  , 
      city_name , 
      trade_area_id  ,
      trade_area_name  , 
      store_id , 
      store_name , 
      brand_id , 
      brand_name ,
      max_class_id ,
      max_class_name,
      mid_class_id , 
     mid_class_name, 
      min_class_id ,
      min_class_name, 

      -- group_type字段的值, 需要根据不同的维度分组, 标上不同的值
      case 
          when grouping(store_id) = 0 then 'store'
          when grouping(trade_area_id) = 0 then 'trade_area'
          when grouping(city_id) = 0 then 'city'
          when grouping(brand_id) = 0 then 'brand'
          when grouping(min_class_id) = 0 then 'min_class'
          when grouping(mid_class_id) = 0 then 'mid_class'
          when grouping(max_class_id) = 0 then 'max_class'
          when grouping(dt) = 0 then 'all'
          else 'other' 
      end  as  group_type,

      -- 销售收入:
      case     
          when grouping(store_id) = 0 then sum( if(order_rn = 1 and store_id is not null,coalesce(order_amount,0),0) )
          when grouping(trade_area_id) = 0 then sum( if(order_rn = 1 and trade_area_id is not null,coalesce(order_amount,0),0) )
          when grouping(city_id) = 0 then sum( if(order_rn = 1 and city_id is not null,coalesce(order_amount,0),0) )
          when grouping(brand_id) = 0 then sum( if( brand_goods_rn = 1 and brand_id is not null,coalesce(total_price,0),0) )
          when grouping(min_class_id) = 0 then sum( if( min_class_goods_rn = 1 and min_class_id is not null,coalesce(total_price,0),0) )
          when grouping(mid_class_id) = 0 then sum( if( mid_class_goods_rn = 1 and mid_class_id is not null,coalesce(total_price,0),0) )
          when grouping(max_class_id) = 0 then sum( if( max_class_goods_rn = 1 and max_class_id is not null,coalesce(total_price,0),0) )
          when grouping(dt) = 0 then sum( if(order_rn = 1 and dt is not null,coalesce(order_amount,0),0) )
          else NULL 
      end  as sale_amt,

      -- 平台收入:
      case     
          when grouping(store_id) = 0 then sum( if(order_rn = 1 and store_id is not null,coalesce(plat_fee,0),0) )
          when grouping(trade_area_id) = 0 then sum( if(order_rn = 1 and trade_area_id is not null,coalesce(plat_fee,0),0) )
          when grouping(city_id) = 0 then sum( if(order_rn = 1 and city_id is not null,coalesce(plat_fee,0),0) )
          when grouping(brand_id) = 0 then  null 
          when grouping(min_class_id) = 0 then null
          when grouping(mid_class_id) = 0 then null
          when grouping(max_class_id) = 0 then null
          when grouping(dt) = 0 then sum( if(order_rn = 1 and dt is not null,coalesce(plat_fee,0),0) )
          else NULL 
      end  as plat_amt,

      -- 配送成交额
      case     
          when grouping(store_id) = 0 then sum( if(order_rn = 1 and store_id is not null,coalesce(delivery_fee,0),0) )
          when grouping(trade_area_id) = 0 then sum( if(order_rn = 1 and trade_area_id is not null,coalesce(delivery_fee,0),0) )
          when grouping(city_id) = 0 then sum( if(order_rn = 1 and city_id is not null,coalesce(delivery_fee,0),0) )
          when grouping(brand_id) = 0 then  null 
          when grouping(min_class_id) = 0 then null
          when grouping(mid_class_id) = 0 then null
          when grouping(max_class_id) = 0 then null
          when grouping(dt) = 0 then sum( if(order_rn = 1 and dt is not null,coalesce(delivery_fee,0),0) )
          else NULL 
      end  as deliver_sale_amt,

      -- 小程序成交额:
      case     
          when grouping(store_id) = 0 
              then sum( 
                          if(
                              order_rn = 1 and store_id is not null and order_from = 'miniapp',
                              coalesce(order_amount,0),
                              0
                          ) 
                      )
          when grouping(trade_area_id) = 0 
              then sum( 
                          if(
                              order_rn = 1 and trade_area_id is not null and order_from = 'miniapp',
                              coalesce(order_amount,0),
                              0
                          ) 
                      )
          when grouping(city_id) = 0 
              then sum( 
                      if(
                          order_rn = 1 and city_id is not null and order_from = 'miniapp',
                          coalesce(order_amount,0),
                          0
                      ) 
                  )
          when grouping(brand_id) = 0 
              then sum( 
                      if( 
                          brand_goods_rn = 1 and brand_id is not null and order_from = 'miniapp',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(min_class_id) = 0 
              then sum( 
                      if( 
                          min_class_goods_rn = 1 and min_class_id is not null and order_from = 'miniapp',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(mid_class_id) = 0 
              then sum( 
                      if( 
                          mid_class_goods_rn = 1 and mid_class_id is not null and order_from = 'miniapp',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(max_class_id) = 0 
              then sum( 
                      if( 
                          max_class_goods_rn = 1 and max_class_id is not null and order_from = 'miniapp',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(dt) = 0 
              then sum( 
                      if(
                          order_rn = 1 and dt is not null and order_from = 'miniapp',
                          coalesce(order_amount,0),
                          0
                      ) 
                  )
          else NULL 
      end  as mini_app_sale_amt,

      -- 安卓成交额
      case     
          when grouping(store_id) = 0 
              then sum( 
                          if(
                              order_rn = 1 and store_id is not null and order_from = 'android',
                              coalesce(order_amount,0),
                              0
                          ) 
                      )
          when grouping(trade_area_id) = 0 
              then sum( 
                          if(
                              order_rn = 1 and trade_area_id is not null and order_from = 'android',
                              coalesce(order_amount,0),
                              0
                          ) 
                      )
          when grouping(city_id) = 0 
              then sum( 
                      if(
                          order_rn = 1 and city_id is not null and order_from = 'android',
                          coalesce(order_amount,0),
                          0
                      ) 
                  )
          when grouping(brand_id) = 0 
              then sum( 
                      if( 
                          brand_goods_rn = 1 and brand_id is not null and order_from = 'android',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(min_class_id) = 0 
              then sum( 
                      if( 
                          min_class_goods_rn = 1 and min_class_id is not null and order_from = 'android',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(mid_class_id) = 0 
              then sum( 
                      if( 
                          mid_class_goods_rn = 1 and mid_class_id is not null and order_from = 'android',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(max_class_id) = 0 
              then sum( 
                      if( 
                          max_class_goods_rn = 1 and max_class_id is not null and order_from = 'android',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(dt) = 0 
              then sum( 
                      if(
                          order_rn = 1 and dt is not null and order_from = 'android',
                          coalesce(order_amount,0),
                          0
                      ) 
                  )
          else NULL 
      end  as android_sale_amt,

      -- 苹果成交额
      case     
          when grouping(store_id) = 0 
              then sum( 
                          if(
                              order_rn = 1 and store_id is not null and order_from = 'ios',
                              coalesce(order_amount,0),
                              0
                          ) 
                      )
          when grouping(trade_area_id) = 0 
              then sum( 
                          if(
                              order_rn = 1 and trade_area_id is not null and order_from = 'ios',
                              coalesce(order_amount,0),
                              0
                          ) 
                      )
          when grouping(city_id) = 0 
              then sum( 
                      if(
                          order_rn = 1 and city_id is not null and order_from = 'ios',
                          coalesce(order_amount,0),
                          0
                      ) 
                  )
          when grouping(brand_id) = 0 
              then sum( 
                      if( 
                          brand_goods_rn = 1 and brand_id is not null and order_from = 'ios',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(min_class_id) = 0 
              then sum( 
                      if( 
                          min_class_goods_rn = 1 and min_class_id is not null and order_from = 'ios',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(mid_class_id) = 0 
              then sum( 
                      if( 
                          mid_class_goods_rn = 1 and mid_class_id is not null and order_from = 'ios',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(max_class_id) = 0 
              then sum( 
                      if( 
                          max_class_goods_rn = 1 and max_class_id is not null and order_from = 'ios',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(dt) = 0 
              then sum( 
                      if(
                          order_rn = 1 and dt is not null and order_from = 'ios',
                          coalesce(order_amount,0),
                          0
                      ) 
                  )
          else NULL 
      end  as ios_sale_amt,

      -- PC成交额
      case     
          when grouping(store_id) = 0 
              then sum( 
                          if(
                              order_rn = 1 and store_id is not null and order_from = 'pcweb',
                              coalesce(order_amount,0),
                              0
                          ) 
                      )
          when grouping(trade_area_id) = 0 
              then sum( 
                          if(
                              order_rn = 1 and trade_area_id is not null and order_from = 'pcweb',
                              coalesce(order_amount,0),
                              0
                          ) 
                      )
          when grouping(city_id) = 0 
              then sum( 
                      if(
                          order_rn = 1 and city_id is not null and order_from = 'pcweb',
                          coalesce(order_amount,0),
                          0
                      ) 
                  )
          when grouping(brand_id) = 0 
              then sum( 
                      if( 
                          brand_goods_rn = 1 and brand_id is not null and order_from = 'pcweb',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(min_class_id) = 0 
              then sum( 
                      if( 
                          min_class_goods_rn = 1 and min_class_id is not null and order_from = 'pcweb',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(mid_class_id) = 0 
              then sum( 
                      if( 
                          mid_class_goods_rn = 1 and mid_class_id is not null and order_from = 'pcweb',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(max_class_id) = 0 
              then sum( 
                      if( 
                          max_class_goods_rn = 1 and max_class_id is not null and order_from = 'pcweb',
                          coalesce(total_price,0),
                          0
                      ) 
                  )
          when grouping(dt) = 0 
              then sum( 
                      if(
                          order_rn = 1 and dt is not null and order_from = 'pcweb',
                          coalesce(order_amount,0),
                          0
                      ) 
                  )
          else NULL 
      end  as pcweb_sale_amt,

      --- 订单量相关指标:
      -- 成交单量:
      case 
          when grouping(store_id) = 0 
              then count(
                      if(
                          order_rn = 1 and store_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(trade_area_id) = 0 
              then count(
                      if(
                          order_rn = 1 and trade_area_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(city_id) = 0 
              then count(
                      if(
                          order_rn = 1 and city_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(brand_id) = 0 
              then count(
                      if(
                          brand_rn = 1 and brand_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(min_class_id) = 0 
              then count(
                      if(
                          min_class_rn = 1 and min_class_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(mid_class_id) = 0 
              then count(
                      if(
                          mid_class_rn = 1 and mid_class_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(max_class_id) = 0 
              then count(
                      if(
                          max_class_rn = 1 and max_class_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(dt) = 0 
              then count(
                      if(
                          order_rn = 1 and dt is not null,
                          order_id,
                          NULL
                      )
                  )
          else NULL
      end  as  order_cnt,

      -- 参评单量:
      case 
          when grouping(store_id) = 0 
              then count(
                      if(
                          order_rn = 1 and store_id is not null and evaluation_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(trade_area_id) = 0 
              then count(
                      if(
                          order_rn = 1 and trade_area_id is not null  and evaluation_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(city_id) = 0 
              then count(
                      if(
                          order_rn = 1 and city_id is not null  and evaluation_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(brand_id) = 0 
              then count(
                      if(
                          brand_rn = 1 and brand_id is not null  and evaluation_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(min_class_id) = 0 
              then count(
                      if(
                          min_class_rn = 1 and min_class_id is not null  and evaluation_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(mid_class_id) = 0 
              then count(
                      if(
                          mid_class_rn = 1 and mid_class_id is not null   and evaluation_id is not null ,
                          order_id,
                          NULL
                      )
                  )
          when grouping(max_class_id) = 0 
              then count(
                      if(
                          max_class_rn = 1 and max_class_id is not null   and evaluation_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(dt) = 0 
              then count(
                      if(
                          order_rn = 1 and dt is not null   and evaluation_id is not null,
                          order_id,
                          NULL
                      )
                  )
          else NULL
      end  as  eva_order_cnt,

      -- 差评单量:
      case 
          when grouping(store_id) = 0 
              then count(
                      if(
                          order_rn = 1 and store_id is not null and evaluation_id is not null and geval_scores < 6,
                          order_id,
                          NULL
                      )
                  )
          when grouping(trade_area_id) = 0 
              then count(
                      if(
                          order_rn = 1 and trade_area_id is not null  and evaluation_id is not null  and geval_scores < 6,
                          order_id,
                          NULL
                      )
                  )
          when grouping(city_id) = 0 
              then count(
                      if(
                          order_rn = 1 and city_id is not null  and evaluation_id is not null  and geval_scores < 6,
                          order_id,
                          NULL
                      )
                  )
          when grouping(brand_id) = 0 
              then count(
                      if(
                          brand_rn = 1 and brand_id is not null  and evaluation_id is not null  and geval_scores < 6,
                          order_id,
                          NULL
                      )
                  )
          when grouping(min_class_id) = 0 
              then count(
                      if(
                          min_class_rn = 1 and min_class_id is not null  and evaluation_id is not null  and geval_scores < 6,
                          order_id,
                          NULL
                      )
                  )
          when grouping(mid_class_id) = 0 
              then count(
                      if(
                          mid_class_rn = 1 and mid_class_id is not null   and evaluation_id is not null   and geval_scores < 6,
                          order_id,
                          NULL
                      )
                  )
          when grouping(max_class_id) = 0 
              then count(
                      if(
                          max_class_rn = 1 and max_class_id is not null   and evaluation_id is not null  and geval_scores < 6,
                          order_id,
                          NULL
                      )
                  )
          when grouping(dt) = 0 
              then count(
                      if(
                          order_rn = 1 and dt is not null   and evaluation_id is not null and geval_scores < 6,
                          order_id,
                          NULL
                      )
                  )
          else NULL
      end  as  bad_eva_order_cnt,

      -- 配送成交单量
      case 
          when grouping(store_id) = 0 
              then count(
                      if(
                          order_rn = 1 and store_id is not null and delievery_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(trade_area_id) = 0 
              then count(
                      if(
                          order_rn = 1 and trade_area_id is not null and delievery_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(city_id) = 0 
              then count(
                      if(
                          order_rn = 1 and city_id is not null and delievery_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(brand_id) = 0 
              then count(
                      if(
                          brand_rn = 1 and brand_id is not null and delievery_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(min_class_id) = 0 
              then count(
                      if(
                          min_class_rn = 1 and min_class_id is not null and delievery_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(mid_class_id) = 0 
              then count(
                      if(
                          mid_class_rn = 1 and mid_class_id is not null  and delievery_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(max_class_id) = 0 
              then count(
                      if(
                          max_class_rn = 1 and max_class_id is not null  and delievery_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(dt) = 0 
              then count(
                      if(
                          order_rn = 1 and dt is not null   and delievery_id is not null,
                          order_id,
                          NULL
                      )
                  )
          else NULL
      end  as  deliver_order_cnt,

      -- 退款成交单量
      case 
          when grouping(store_id) = 0 
              then count(
                      if(
                          order_rn = 1 and store_id is not null and refund_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(trade_area_id) = 0 
              then count(
                      if(
                          order_rn = 1 and trade_area_id is not null and refund_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(city_id) = 0 
              then count(
                      if(
                          order_rn = 1 and city_id is not null and refund_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(brand_id) = 0 
              then count(
                      if(
                          brand_rn = 1 and brand_id is not null and refund_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(min_class_id) = 0 
              then count(
                      if(
                          min_class_rn = 1 and min_class_id is not null and refund_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(mid_class_id) = 0 
              then count(
                      if(
                          mid_class_rn = 1 and mid_class_id is not null  and refund_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(max_class_id) = 0 
              then count(
                      if(
                          max_class_rn = 1 and max_class_id is not null  and refund_id is not null,
                          order_id,
                          NULL
                      )
                  )
          when grouping(dt) = 0 
              then count(
                      if(
                          order_rn = 1 and dt is not null   and refund_id is not null,
                          order_id,
                          NULL
                      )
                  )
          else NULL
      end  as  refund_order_cnt,

      -- 小程序成交量
      case 
          when grouping(store_id) = 0 
              then count(
                      if(
                          order_rn = 1 and store_id is not null and order_from = 'miniapp',
                          order_id,
                          NULL
                      )
                  )
          when grouping(trade_area_id) = 0 
              then count(
                      if(
                          order_rn = 1 and trade_area_id is not null and order_from = 'miniapp',
                          order_id,
                          NULL
                      )
                  )
          when grouping(city_id) = 0 
              then count(
                      if(
                          order_rn = 1 and city_id is not null and order_from = 'miniapp',
                          order_id,
                          NULL
                      )
                  )
          when grouping(brand_id) = 0 
              then count(
                      if(
                          brand_rn = 1 and brand_id is not null and order_from = 'miniapp',
                          order_id,
                          NULL
                      )
                  )
          when grouping(min_class_id) = 0 
              then count(
                      if(
                          min_class_rn = 1 and min_class_id is not null and order_from = 'miniapp',
                          order_id,
                          NULL
                      )
                  )
          when grouping(mid_class_id) = 0 
              then count(
                      if(
                          mid_class_rn = 1 and mid_class_id is not null  and order_from = 'miniapp',
                          order_id,
                          NULL
                      )
                  )
          when grouping(max_class_id) = 0 
              then count(
                      if(
                          max_class_rn = 1 and max_class_id is not null  and order_from = 'miniapp',
                          order_id,
                          NULL
                      )
                  )
          when grouping(dt) = 0 
              then count(
                      if(
                          order_rn = 1 and dt is not null   and order_from = 'miniapp',
                          order_id,
                          NULL
                      )
                  )
          else NULL
      end  as  miniapp_order_cnt,

      -- 安卓成交量
      case 
          when grouping(store_id) = 0 
              then count(
                      if(
                          order_rn = 1 and store_id is not null and order_from = 'android',
                          order_id,
                          NULL
                      )
                  )
          when grouping(trade_area_id) = 0 
              then count(
                      if(
                          order_rn = 1 and trade_area_id is not null and order_from = 'android',
                          order_id,
                          NULL
                      )
                  )
          when grouping(city_id) = 0 
              then count(
                      if(
                          order_rn = 1 and city_id is not null and order_from = 'android',
                          order_id,
                          NULL
                      )
                  )
          when grouping(brand_id) = 0 
              then count(
                      if(
                          brand_rn = 1 and brand_id is not null and order_from = 'android',
                          order_id,
                          NULL
                      )
                  )
          when grouping(min_class_id) = 0 
              then count(
                      if(
                          min_class_rn = 1 and min_class_id is not null and order_from = 'android',
                          order_id,
                          NULL
                      )
                  )
          when grouping(mid_class_id) = 0 
              then count(
                      if(
                          mid_class_rn = 1 and mid_class_id is not null  and order_from = 'android',
                          order_id,
                          NULL
                      )
                  )
          when grouping(max_class_id) = 0 
              then count(
                      if(
                          max_class_rn = 1 and max_class_id is not null  and order_from = 'android',
                          order_id,
                          NULL
                      )
                  )
          when grouping(dt) = 0 
              then count(
                      if(
                          order_rn = 1 and dt is not null   and order_from = 'android',
                          order_id,
                          NULL
                      )
                  )
          else NULL
      end  as  android_order_cnt,

      -- 苹果成交量
      case 
          when grouping(store_id) = 0 
              then count(
                      if(
                          order_rn = 1 and store_id is not null and order_from = 'ios',
                          order_id,
                          NULL
                      )
                  )
          when grouping(trade_area_id) = 0 
              then count(
                      if(
                          order_rn = 1 and trade_area_id is not null and order_from = 'ios',
                          order_id,
                          NULL
                      )
                  )
          when grouping(city_id) = 0 
              then count(
                      if(
                          order_rn = 1 and city_id is not null and order_from = 'ios',
                          order_id,
                          NULL
                      )
                  )
          when grouping(brand_id) = 0 
              then count(
                      if(
                          brand_rn = 1 and brand_id is not null and order_from = 'ios',
                          order_id,
                          NULL
                      )
                  )
          when grouping(min_class_id) = 0 
              then count(
                      if(
                          min_class_rn = 1 and min_class_id is not null and order_from = 'ios',
                          order_id,
                          NULL
                      )
                  )
          when grouping(mid_class_id) = 0 
              then count(
                      if(
                          mid_class_rn = 1 and mid_class_id is not null  and order_from = 'ios',
                          order_id,
                          NULL
                      )
                  )
          when grouping(max_class_id) = 0 
              then count(
                      if(
                          max_class_rn = 1 and max_class_id is not null  and order_from = 'ios',
                          order_id,
                          NULL
                      )
                  )
          when grouping(dt) = 0 
              then count(
                      if(
                          order_rn = 1 and dt is not null   and order_from = 'ios',
                          order_id,
                          NULL
                      )
                  )
          else NULL
      end  as  ios_order_cnt,

      -- pc成交量
      case 
          when grouping(store_id) = 0 
              then count(
                      if(
                          order_rn = 1 and store_id is not null and order_from = 'pcweb',
                          order_id,
                          NULL
                      )
                  )
          when grouping(trade_area_id) = 0 
              then count(
                      if(
                          order_rn = 1 and trade_area_id is not null and order_from = 'pcweb',
                          order_id,
                          NULL
                      )
                  )
          when grouping(city_id) = 0 
              then count(
                      if(
                          order_rn = 1 and city_id is not null and order_from = 'pcweb',
                          order_id,
                          NULL
                      )
                  )
          when grouping(brand_id) = 0 
              then count(
                      if(
                          brand_rn = 1 and brand_id is not null and order_from = 'pcweb',
                          order_id,
                          NULL
                      )
                  )
          when grouping(min_class_id) = 0 
              then count(
                      if(
                          min_class_rn = 1 and min_class_id is not null and order_from = 'pcweb',
                          order_id,
                          NULL
                      )
                  )
          when grouping(mid_class_id) = 0 
              then count(
                      if(
                          mid_class_rn = 1 and mid_class_id is not null  and order_from = 'pcweb',
                          order_id,
                          NULL
                      )
                  )
          when grouping(max_class_id) = 0 
              then count(
                      if(
                          max_class_rn = 1 and max_class_id is not null  and order_from = 'pcweb',
                          order_id,
                          NULL
                      )
                  )
          when grouping(dt) = 0 
              then count(
                      if(
                          order_rn = 1 and dt is not null   and order_from = 'pcweb',
                          order_id,
                          NULL
                      )
                  )
          else NULL
      end  as  pcweb_order_cnt,

      dt

from  t1 
group by  grouping sets(
    dt,
    (dt,province_id,province_name,city_id,city_name),
    (dt,province_id,province_name,city_id,city_name,trade_area_id,trade_area_name),
    (dt,province_id,province_name,city_id,city_name,trade_area_id,trade_area_name,store_id,store_name),
    (dt,brand_id,brand_name),
    (dt,max_class_id,max_class_name),
    (dt,max_class_id,max_class_name,mid_class_id,mid_class_name),
    (dt,max_class_id,max_class_name,mid_class_id,mid_class_name,min_class_id,min_class_name)
)

1.2 商品主题统计宽表

    主要指标有:下单次数、下单件数、下单金额、被支付次数、被支付件数、被支付金额、被退款次数、被退款件数、被退款金额、被加入购物车次数、被加入购物车件数、被收藏次数、好评数、中评数、差评数。<br />        维度有:商品、日期。
  • 创建商品主题统计宽表(HIVE执行)

    create table xxx.dws_sku_daycount ( 
    dt STRING,
    sku_id string comment 'sku_id',
    sku_name string comment '商品名称',
    order_count bigint comment '被下单次数',
    order_num bigint comment '被下单件数',
    order_amount decimal(38,2) comment '被下单金额',
    payment_count bigint  comment '被支付次数',
    payment_num bigint comment '被支付件数',
    payment_amount decimal(38,2) comment '被支付金额',
    refund_count bigint  comment '被退款次数',
    refund_num bigint comment '被退款件数',
    refund_amount  decimal(38,2) comment '被退款金额',
    cart_count bigint comment '被加入购物车次数',
    cart_num bigint comment '被加入购物车件数',
    favor_count bigint comment '被收藏次数',
    evaluation_good_count bigint comment '好评数',
    evaluation_mid_count bigint comment '中评数',
    evaluation_bad_count bigint comment '差评数'
    ) COMMENT '每日商品行为'
    --PARTITIONED BY(dt STRING)
    ROW format delimited fields terminated BY '\t'
    stored AS orc tblproperties ('orc.compress' = 'SNAPPY');
    
  • 需求分析: ```properties 主要指标有: 下单次数、 下单件数、 下单金额、 被支付次数、 被支付件数、 被支付金额、 被退款次数、 被退款件数、 被退款金额、 被加入购物车次数、 被加入购物车件数、 被收藏次数、 好评数、 中评数、 差评数。

维度有: 商品 + 日期。

下单次数、 下单件数、 下单金额: 表: 订单明细宽表 dwb_order_detail 涉及字段: 维度字段: 商品维度: goods_id, goods_name 日期: dt 指标字段: 次数: 产生了多少个订单 order_id 件数: buy_num 金额: total_price

被支付次数、被支付件数、被支付金额: 表: 订单明细宽表 dwb_order_detail 过滤出已经支付的订单数据: is_pay = 1 and order_state not in(1,7) 涉及字段: 维度字段: 商品维度: goods_id, goods_name 日期: dt 指标字段: 次数: 产生了多少个订单 order_id 件数: buy_num 金额: total_price

被退款次数、被退款件数、被退款金额: 表: 订单明细宽表 dwb_order_detail 过滤出已经退款的订单数据: refund_id is not null and refund_state = 5 (退款完成) 涉及字段: 维度字段: 商品维度: goods_id, goods_name 日期: dt 指标字段: 次数: 产生了多少个订单 order_id 件数: buy_num 金额: total_price

被加入购物车次数、被加入购物车件数: 表: dwd.fact_shop_cart(购物车表) 在此表放置的数据, 都是购物车的相关数据 和 dwb_goods_detail(商品明细宽表) 关联条件: c.goods_id = g.goods_id and c.end_date = ‘9999-99-99’ 涉及字段: 维度字段: 商品维度: c.goods_id 和 g.goods_name 时间维度: c.create_time (此处不要使用 start_date, 因为这个起始时间, 有可能是采集数据时间, 并不是数据创建时间) 指标字段: 次数: c.id 件数: c.buy_num

被收藏次数: 表: dwd.fact_goods_collect(收藏表) 和 dwb_goods_detail(商品明细宽表) 关联条件: c.goods_id = g.goods_id and c.end_date = ‘9999-99-99’ 涉及字段: 维度字段: 商品维度: c.goods_id , g.goods_name 时间维度: c.create_time

    维度字段: 
        次数: c.id

好评数、中评数、差评数: 表: dwd.fact_goods_evaluation_detail(商品评价表) 和 dwb_goods_detail(商品明细宽表) 关联条件: eva.goods_id = g.goods_id and eva.end_date = ‘9999-99-99’ 判断好评 中评 差评呢?
eva.geval_scores_goods 10分制
好评: 大于 8分, 中评 6~8 差评 6以下 涉及字段: 涉及维度: 商品维度: eva.goods_id 和 g.goods_name 时间维度: eva.create_time 指标字段:
数量: eva.id


- SQL实现:
   - 目前: 将每一个指定的统计结果, 都放置在一个临表:  
```sql
-- 对订单表进行指标计算的时候, 需要先去重(由于我们是基于商品统计, 需要保证每笔订单中有不同的商品, 相同商品去除掉)
with t1 as (
    select 
        -- 维度字段:
        dt, -- 订单生成时间
        pay_time, --支付时间
        apply_date, -- 退款时间
        goods_id,
        goods_name,
        -- 指标字段
        order_id, -- 计算次数
        buy_num , -- 商品数量, 计算件数
        total_price , -- 商品金额, 计算金额

        -- 判断字段:
        is_pay , -- 是否支付, 为1表示支付
        order_state , -- 订单状态, 不能为 1和7
        refund_id , -- 退款id  不能为null
        refund_state , -- 退款状态  必须为 5

        -- 去重操作
        row_number() over(partition by order_id,goods_id) as  rn1
    from hive.bj59_yp_dwb_jiale.dwb_order_detail 
),
-- 下单次数、 下单件数、 下单金额:
order_goods_1 as (
    select
        dt,
        goods_id,
        goods_name,
        count(order_id) as order_count,
        sum(buy_num) as order_num,
        sum(total_price) as order_amount
    from  t1  where  rn1 =1
    group by goods_id,goods_name,dt
),
-- 被支付次数、被支付件数、被支付金额:
payment_2 AS (
    select 
        substr(pay_time,1,10) as dt,
        goods_id,
        goods_name,
        count(order_id) as payment_count,
        sum(buy_num) as payment_num,
        sum(total_price) as payment_amount
    from t1 where rn1 =1 and is_pay = 1 and  order_state not in(1,7)
    group by substr(pay_time,1,10),goods_id,goods_name
),
-- 被退款的次数, 被退款的件数, 被退款的金额
refund_3 AS (
    SELECT 
        substr(apply_date,1,10) AS dt , 
        goods_id,
        goods_name,

        count(order_id) as refund_count,
        sum(buy_num) as refund_num,
        sum(total_price) as refund_amount
    FROM t1  WHERE  rn1 = 1 AND refund_id NOT NULL AND refund_state = 5
    GROUP BY substr(apply_date,1,10), goods_id,goods_name
),
-- 被收藏次数
favor_4 as (
    SELECT
        substr(c.create_time,1,10) as dt,
        c.goods_id,
        g.goods_name,
        count(c.id) AS  favor_count
    FROM  hive.xxxx.fact_goods_collect c 
        JOIN hive.bj59_yp_dwb_jiale.dwb_goods_detail g 
            ON  c.goods_id = g.goods_id and c.end_date = '9999-99-99'
    GROUP BY  substr(c.create_time,1,10),c.goods_id,g.goods_name
)
-- 加入购物车的次数和 件数
cart_5 as (
    select
        substr(c.create_time,1,10) as dt,
        c.goods_id,
        g.goods_name,

        count(c.id) as cart_count,
        sum(c.buy_num) as cart_num
    from hive.xxxx.fact_shop_cart c 
        JOIN hive.bj59_yp_dwb_jiale.dwb_goods_detail g 
                ON  c.goods_id = g.goods_id and c.end_date = '9999-99-99'
    group by substr(c.create_time,1,10),c.goods_id,g.goods_name
)
eva_6 as (
    select 
        substr(eva.create_time,1,10) as  dt,
        eva.goods_id,
        g.goods_name,

        count(
            if(
                eva.geval_scores_goods is not null and  eva.geval_scores_goods > 8,
                eva.id,
                null
            )
        ) as evaluation_good_count,

        count(
            if(
                eva.geval_scores_goods is not null and  eva.geval_scores_goods between 6 and 8,
                eva.id,
                null
            )
        ) as evaluation_mid_count,

        count(
            if(
                eva.geval_scores_goods is not null and  eva.geval_scores_goods < 6,
                eva.id,
                null
            )
        ) as evaluation_bad_count
    from hive.xxxx.fact_goods_evaluation_detail eva
        join hive.bj59_yp_dwb_jiale.dwb_goods_detail g 
                on  eva.goods_id = g.goods_id and eva.end_date = '9999-99-99'
    group by substr(eva.create_time,1,10),eva.goods_id,g.goods_name
)
  • 第二步: 将结果灌入到目标表: 需要将多个临表全部合并在一起, 灌入到目标表

image.png

-- 对订单表进行指标计算的时候, 需要先去重(由于我们是基于商品统计, 需要保证每笔订单中有不同的商品, 相同商品去除掉)
insert into hive.bj59_yp_dws_jiale.dws_sku_daycount 
with t1 as (
    select 
        -- 维度字段:
        dt, -- 订单生成时间
        pay_time, --支付时间
        apply_date, -- 退款时间
        goods_id,
        goods_name,
        -- 指标字段
        order_id, -- 计算次数
        buy_num , -- 商品数量, 计算件数
        total_price , -- 商品金额, 计算金额

        -- 判断字段:
        is_pay , -- 是否支付, 为1表示支付
        order_state , -- 订单状态, 不能为 1和7
        refund_id , -- 退款id  不能为null
        refund_state , -- 退款状态  必须为 5

        -- 去重操作
        row_number() over(partition by order_id,goods_id) as  rn1
    from hive.xxxx.dwb_order_detail 
),
-- 下单次数、 下单件数、 下单金额:
order_goods_1 as (
    select
        dt,
        goods_id,
        goods_name,
        count(order_id) as order_count,
        sum(buy_num) as order_num,
        sum(total_price) as order_amount
    from  t1  where  rn1 =1
    group by goods_id,goods_name,dt
),
-- 被支付次数、被支付件数、被支付金额:
payment_2 AS (
    select 
        substr(pay_time,1,10) as dt,
        goods_id,
        goods_name,
        count(order_id) as payment_count,
        sum(buy_num) as payment_num,
        sum(total_price) as payment_amount
    from t1 where rn1 =1 and is_pay = 1 and  order_state not in(1,7)
    group by substr(pay_time,1,10),goods_id,goods_name
),
-- 被退款的次数, 被退款的件数, 被退款的金额
refund_3 AS (
    SELECT 
        substr(apply_date,1,10) AS dt , 
        goods_id,
        goods_name,

        count(order_id) as refund_count,
        sum(buy_num) as refund_num,
        sum(total_price) as refund_amount
    FROM t1  WHERE  rn1 = 1 AND refund_id is NOT NULL AND refund_state = 5
    GROUP BY substr(apply_date,1,10), goods_id,goods_name
),
-- 被收藏次数
favor_4 as (
    SELECT
        substr(c.create_time,1,10) as dt,
        c.goods_id,
        g.goods_name,
        count(c.id) AS  favor_count
    FROM  hive.bj59_yp_dwd_jiale.fact_goods_collect c 
        JOIN hive.xxxx.dwb_goods_detail g 
            ON  c.goods_id = g.id and c.end_date = '9999-99-99'
    GROUP BY  substr(c.create_time,1,10),c.goods_id,g.goods_name
),
-- 加入购物车的次数和 件数
cart_5 as (
    select
        substr(c.create_time,1,10) as dt,
        c.goods_id,
        g.goods_name,

        count(c.id) as cart_count,
        sum(c.buy_num) as cart_num
    from hive.bj59_yp_dwd_jiale.fact_shop_cart c 
        JOIN hive.xxxx.dwb_goods_detail g 
                ON  c.goods_id = g.id and c.end_date = '9999-99-99'
    group by substr(c.create_time,1,10),c.goods_id,g.goods_name
),
eva_6 as (
    select 
        substr(eva.create_time,1,10) as  dt,
        eva.goods_id,
        g.goods_name,

        count(
            if(
                eva.geval_scores_goods is not null and  eva.geval_scores_goods > 8,
                eva.id,
                null
            )
        ) as evaluation_good_count,

        count(
            if(
                eva.geval_scores_goods is not null and  eva.geval_scores_goods between 6 and 8,
                eva.id,
                null
            )
        ) as evaluation_mid_count,

        count(
            if(
                eva.geval_scores_goods is not null and  eva.geval_scores_goods < 6,
                eva.id,
                null
            )
        ) as evaluation_bad_count
    from hive.bj59_yp_dwd_jiale.fact_goods_evaluation_detail eva
        join hive.xxxx.dwb_goods_detail g 
                on  eva.goods_id = g.id and eva.end_date = '9999-99-99'
    group by substr(eva.create_time,1,10),eva.goods_id,g.goods_name
),
t7 as (
    select 
        coalesce(t1.dt,t2.dt,t3.dt,t4.dt,t5.dt,t6.dt) as dt,
        coalesce(t1.goods_id,t2.goods_id,t3.goods_id,t4.goods_id,t5.goods_id,t6.goods_id) as sku_id,
        coalesce(t1.goods_name,t2.goods_name,t3.goods_name,t4.goods_name,t5.goods_name,t6.goods_name) as sku_name,
        coalesce(t1.order_count,0) as order_count,
        coalesce(t1.order_num,0) as order_num,
        coalesce(t1.order_amount,0) as order_amount,
        coalesce(t2.payment_count,0) as payment_count,
        coalesce(t2.payment_num,0) as payment_num,
        coalesce(t2.payment_amount,0) as payment_amount,
        coalesce(t3.refund_count,0) as refund_count,
        coalesce(t3.refund_num,0) as refund_num,
        coalesce(t3.refund_amount,0) as refund_amount,
        coalesce(t5.cart_count,0) as cart_count,
        coalesce(t5.cart_num,0) as cart_num,
        coalesce(t4.favor_count,0) as favor_count,
        coalesce(t6.evaluation_good_count,0) as evaluation_good_count,
        coalesce(t6.evaluation_mid_count,0) as evaluation_mid_count,
        coalesce(t6.evaluation_bad_count,0) as evaluation_bad_count

    from order_goods_1  t1 
        full join payment_2 t2 on  t1.dt = t2.dt and  t1.goods_id = t2.goods_id
        full join refund_3 t3 on t3.dt = t2.dt and t3.goods_id = t2.goods_id
        full join favor_4  t4 on t4.dt = t3.dt and t4.goods_id = t3.goods_id
        full join cart_5 t5 on t5.dt = t4.dt and t5.goods_id = t4.goods_id
        full join eva_6 t6 on t6.dt = t5.dt and t6.goods_id = t5.goods_id
)
select 
    dt,
    sku_id,
    sku_name,
    sum(order_count)  as order_count,
    sum(order_num)     as order_num,
    sum(order_amount)  as order_amount,
    sum(payment_count) as payment_count,
    sum(payment_num)   as payment_num,
    sum(payment_amount) as payment_amount,
    sum(refund_count)  as refund_count,
    sum(refund_num)    as refund_num,
    sum(refund_amount) as refund_amount,
    sum(cart_count)    as cart_count,
    sum(cart_num)      as cart_num,
    sum(favor_count)   as favor_count,
    sum(evaluation_good_count) as evaluation_good_count,
    sum(evaluation_mid_count)  as evaluation_mid_count,
    sum(evaluation_bad_count) as evaluation_bad_count
from  t7
group by  dt,sku_id,sku_name;-- 对订单表进行指标计算的时候, 需要先去重(由于我们是基于商品统计, 需要保证每笔订单中有不同的商品, 相同商品去除掉)
insert into hive.bj59_yp_dws_jiale.dws_sku_daycount 
with t1 as (
    select 
        -- 维度字段:
        dt, -- 订单生成时间
        pay_time, --支付时间
        apply_date, -- 退款时间
        goods_id,
        goods_name,
        -- 指标字段
        order_id, -- 计算次数
        buy_num , -- 商品数量, 计算件数
        total_price , -- 商品金额, 计算金额

        -- 判断字段:
        is_pay , -- 是否支付, 为1表示支付
        order_state , -- 订单状态, 不能为 1和7
        refund_id , -- 退款id  不能为null
        refund_state , -- 退款状态  必须为 5

        -- 去重操作
        row_number() over(partition by order_id,goods_id) as  rn1
    from hive.xxxx.dwb_order_detail 
),
-- 下单次数、 下单件数、 下单金额:
order_goods_1 as (
    select
        dt,
        goods_id,
        goods_name,
        count(order_id) as order_count,
        sum(buy_num) as order_num,
        sum(total_price) as order_amount
    from  t1  where  rn1 =1
    group by goods_id,goods_name,dt
),
-- 被支付次数、被支付件数、被支付金额:
payment_2 AS (
    select 
        substr(pay_time,1,10) as dt,
        goods_id,
        goods_name,
        count(order_id) as payment_count,
        sum(buy_num) as payment_num,
        sum(total_price) as payment_amount
    from t1 where rn1 =1 and is_pay = 1 and  order_state not in(1,7)
    group by substr(pay_time,1,10),goods_id,goods_name
),
-- 被退款的次数, 被退款的件数, 被退款的金额
refund_3 AS (
    SELECT 
        substr(apply_date,1,10) AS dt , 
        goods_id,
        goods_name,

        count(order_id) as refund_count,
        sum(buy_num) as refund_num,
        sum(total_price) as refund_amount
    FROM t1  WHERE  rn1 = 1 AND refund_id is NOT NULL AND refund_state = 5
    GROUP BY substr(apply_date,1,10), goods_id,goods_name
),
-- 被收藏次数
favor_4 as (
    SELECT
        substr(c.create_time,1,10) as dt,
        c.goods_id,
        g.goods_name,
        count(c.id) AS  favor_count
    FROM  hive.bj59_yp_dwd_jiale.fact_goods_collect c 
        JOIN hive.xxxx.dwb_goods_detail g 
            ON  c.goods_id = g.id and c.end_date = '9999-99-99'
    GROUP BY  substr(c.create_time,1,10),c.goods_id,g.goods_name
),
-- 加入购物车的次数和 件数
cart_5 as (
    select
        substr(c.create_time,1,10) as dt,
        c.goods_id,
        g.goods_name,

        count(c.id) as cart_count,
        sum(c.buy_num) as cart_num
    from hive.bj59_yp_dwd_jiale.fact_shop_cart c 
        JOIN hive.xxxx.dwb_goods_detail g 
                ON  c.goods_id = g.id and c.end_date = '9999-99-99'
    group by substr(c.create_time,1,10),c.goods_id,g.goods_name
),
eva_6 as (
    select 
        substr(eva.create_time,1,10) as  dt,
        eva.goods_id,
        g.goods_name,

        count(
            if(
                eva.geval_scores_goods is not null and  eva.geval_scores_goods > 8,
                eva.id,
                null
            )
        ) as evaluation_good_count,

        count(
            if(
                eva.geval_scores_goods is not null and  eva.geval_scores_goods between 6 and 8,
                eva.id,
                null
            )
        ) as evaluation_mid_count,

        count(
            if(
                eva.geval_scores_goods is not null and  eva.geval_scores_goods < 6,
                eva.id,
                null
            )
        ) as evaluation_bad_count
    from hive.bj59_yp_dwd_jiale.fact_goods_evaluation_detail eva
        join hive.xxxx.dwb_goods_detail g 
                on  eva.goods_id = g.id and eva.end_date = '9999-99-99'
    group by substr(eva.create_time,1,10),eva.goods_id,g.goods_name
),
t7 as (
    select 
        coalesce(t1.dt,t2.dt,t3.dt,t4.dt,t5.dt,t6.dt) as dt,
        coalesce(t1.goods_id,t2.goods_id,t3.goods_id,t4.goods_id,t5.goods_id,t6.goods_id) as sku_id,
        coalesce(t1.goods_name,t2.goods_name,t3.goods_name,t4.goods_name,t5.goods_name,t6.goods_name) as sku_name,
        coalesce(t1.order_count,0) as order_count,
        coalesce(t1.order_num,0) as order_num,
        coalesce(t1.order_amount,0) as order_amount,
        coalesce(t2.payment_count,0) as payment_count,
        coalesce(t2.payment_num,0) as payment_num,
        coalesce(t2.payment_amount,0) as payment_amount,
        coalesce(t3.refund_count,0) as refund_count,
        coalesce(t3.refund_num,0) as refund_num,
        coalesce(t3.refund_amount,0) as refund_amount,
        coalesce(t5.cart_count,0) as cart_count,
        coalesce(t5.cart_num,0) as cart_num,
        coalesce(t4.favor_count,0) as favor_count,
        coalesce(t6.evaluation_good_count,0) as evaluation_good_count,
        coalesce(t6.evaluation_mid_count,0) as evaluation_mid_count,
        coalesce(t6.evaluation_bad_count,0) as evaluation_bad_count

    from order_goods_1  t1 
        full join payment_2 t2 on  t1.dt = t2.dt and  t1.goods_id = t2.goods_id
        full join refund_3 t3 on t3.dt = t2.dt and t3.goods_id = t2.goods_id
        full join favor_4  t4 on t4.dt = t3.dt and t4.goods_id = t3.goods_id
        full join cart_5 t5 on t5.dt = t4.dt and t5.goods_id = t4.goods_id
        full join eva_6 t6 on t6.dt = t5.dt and t6.goods_id = t5.goods_id
)
select 
    dt,
    sku_id,
    sku_name,
    sum(order_count)  as order_count,
    sum(order_num)     as order_num,
    sum(order_amount)  as order_amount,
    sum(payment_count) as payment_count,
    sum(payment_num)   as payment_num,
    sum(payment_amount) as payment_amount,
    sum(refund_count)  as refund_count,
    sum(refund_num)    as refund_num,
    sum(refund_amount) as refund_amount,
    sum(cart_count)    as cart_count,
    sum(cart_num)      as cart_num,
    sum(favor_count)   as favor_count,
    sum(evaluation_good_count) as evaluation_good_count,
    sum(evaluation_mid_count)  as evaluation_mid_count,
    sum(evaluation_bad_count) as evaluation_bad_count
from  t7
group by  dt,sku_id,sku_name;

1.3 用户主题日统计宽表

   作业

2 DM层实现操作

DM层的作用: 进行上卷细化统计操作

  • 创建DM层的库(hive中执行)

    create database IF NOT EXISTS xxx;
    

    2.1 销售主题统计宽表

    可分析的主要指标有:销售收入、平台收入、配送成交额、小程序成交额、安卓APP成交额、苹果APP成交额、PC商城成交额、订单量、参评单量、差评单量、配送单量、退款单量、小程序订单量、安卓APP订单量、苹果APP订单量、PC商城订单量。
    维度有:日期(天(已经统计过), 周, 月, 年)、城市、商圈、店铺、品牌、大类、中类、小类

  • 建表语句: ```sql CREATE TABLE xxx.dm_sale( date_time string COMMENT ‘统计日期,不能用来分组统计’, time_type string COMMENT ‘统计时间维度:year、month、week、date’, year_code string COMMENT ‘年code’, year_month string COMMENT ‘年月’, month_code string COMMENT ‘月份编码’, day_month_num string COMMENT ‘一月第几天’, dim_date_id string COMMENT ‘日期’, year_week_name_cn string COMMENT ‘年中第几周’,

    group_type string COMMENT ‘分组类型:store,trade_area,city,brand,min_class,mid_class,max_class,all’, city_id string COMMENT ‘城市id’, city_name string COMMENT ‘城市name’, trade_area_id string COMMENT ‘商圈id’, trade_area_name string COMMENT ‘商圈名称’, store_id string COMMENT ‘店铺的id’, store_name string COMMENT ‘店铺名称’, brand_id string COMMENT ‘品牌id’, brand_name string COMMENT ‘品牌名称’, max_class_id string COMMENT ‘商品大类id’, max_class_name string COMMENT ‘大类名称’, mid_class_id string COMMENT ‘中类id’, mid_class_name string COMMENT ‘中类名称’, min_class_id string COMMENT ‘小类id’, min_class_name string COMMENT ‘小类名称’, — =======统计======= — 销售收入 sale_amt DECIMAL(38,2) COMMENT ‘销售收入’, — 平台收入 plat_amt DECIMAL(38,2) COMMENT ‘平台收入’, — 配送成交额 deliver_sale_amt DECIMAL(38,2) COMMENT ‘配送成交额’, — 小程序成交额 mini_app_sale_amt DECIMAL(38,2) COMMENT ‘小程序成交额’, — 安卓APP成交额 android_sale_amt DECIMAL(38,2) COMMENT ‘安卓APP成交额’, — 苹果APP成交额 ios_sale_amt DECIMAL(38,2) COMMENT ‘苹果APP成交额’, — PC商城成交额 pcweb_sale_amt DECIMAL(38,2) COMMENT ‘PC商城成交额’, — 成交单量 order_cnt BIGINT COMMENT ‘成交单量’, — 参评单量 eva_order_cnt BIGINT COMMENT ‘参评单量comment=>cmt’, — 差评单量 bad_eva_order_cnt BIGINT COMMENT ‘差评单量negtive-comment=>ncmt’, — 配送成交单量 deliver_order_cnt BIGINT COMMENT ‘配送单量’, — 退款单量 refund_order_cnt BIGINT COMMENT ‘退款单量’, — 小程序成交单量 miniapp_order_cnt BIGINT COMMENT ‘小程序成交单量’, — 安卓APP订单量 android_order_cnt BIGINT COMMENT ‘安卓APP订单量’, — 苹果APP订单量 ios_order_cnt BIGINT COMMENT ‘苹果APP订单量’, — PC商城成交单量 pcweb_order_cnt BIGINT COMMENT ‘PC商城成交单量’ ) COMMENT ‘销售主题宽表’ ROW format delimited fields terminated BY ‘\t’ stored AS orc tblproperties (‘orc.compress’ = ‘SNAPPY’);

<a name="xU011"></a>
#### 统计分析: 先按照天来统计 (前面已经统计过了)
```sql
insert into xxxxx.dm_sale 
select 
    t1.dt as date_time,
    'date' as time_type ,
    t2.year_code ,
    t2.year_month ,
    t2.month_code ,
    t2.day_month_num ,
    t2.dim_date_id ,
    t2.year_week_name_cn ,
    -- 维度
      t1.group_type,
      t1.city_id ,
      t1.city_name ,
      t1.trade_area_id ,
      t1.trade_area_name,
      t1.store_id,
      t1.store_name ,
      t1.brand_id,
      t1.brand_name,
      t1.max_class_id,
      t1.max_class_name,
      t1.mid_class_id,
      t1.mid_class_name,
      t1.min_class_id,
    t1.min_class_name,
    -- 指标
    t1.sale_amt,
    t1.plat_amt,
    t1.deliver_sale_amt ,
    t1.mini_app_sale_amt ,
    t1.android_sale_amt ,
    t1.ios_sale_amt ,
    t1.pcweb_sale_amt,
    t1.order_cnt,
    t1.eva_order_cnt ,
    t1.bad_eva_order_cnt ,
    t1.deliver_order_cnt ,
    t1.refund_order_cnt ,
    t1.miniapp_order_cnt ,
    t1.android_order_cnt,
    t1.ios_order_cnt ,
    t1.pcweb_order_cnt
from xxxxx.dws_sale_daycount  t1 
    left join  xxxx.dim_date  t2 on t1.dt = t2.date_code ;

统计分析:按周来统计

上卷统计的分析流程:
image.png

统计分析: 按月来统计

统计分析: 按年来统计

2.2 商品主题统计宽表

主要指标有:下单次数、下单件数、下单金额、被支付次数、被支付件数、被支付金额、被退款次数、被退款件数、被退款金额、被加入购物车次数、被加入购物车件数、被收藏次数、好评数、中评数、差评数。
维度有:商品+日期(总累计值,近30天的数据)。

  • 构建DM层商品主题统计宽表(hive中执行):

    create table xxx.dm_sku
    (
    
      sku_id string comment 'sku_id',
      sku_name string comment '商品名称',
      order_last_30d_count bigint comment '最近30日被下单次数',
      order_last_30d_num bigint comment '最近30日被下单件数',
      order_last_30d_amount decimal(38,2)  comment '最近30日被下单金额',
      order_count bigint comment '累积被下单次数',
      order_num bigint comment '累积被下单件数',
      order_amount decimal(38,2) comment '累积被下单金额',
      payment_last_30d_count   bigint  comment '最近30日被支付次数',
      payment_last_30d_num bigint comment '最近30日被支付件数',
      payment_last_30d_amount  decimal(38,2) comment '最近30日被支付金额',
      payment_count   bigint  comment '累积被支付次数',
      payment_num bigint comment '累积被支付件数',
      payment_amount  decimal(38,2) comment '累积被支付金额',
      refund_last_30d_count bigint comment '最近三十日退款次数',
      refund_last_30d_num bigint comment '最近三十日退款件数',
      refund_last_30d_amount decimal(38,2) comment '最近三十日退款金额',
      refund_count bigint comment '累积退款次数',
      refund_num bigint comment '累积退款件数',
      refund_amount decimal(38,2) comment '累积退款金额',
      cart_last_30d_count bigint comment '最近30日被加入购物车次数',
      cart_last_30d_num bigint comment '最近30日被加入购物车件数',
      cart_count bigint comment '累积被加入购物车次数',
      cart_num bigint comment '累积被加入购物车件数',
      favor_last_30d_count bigint comment '最近30日被收藏次数',
      favor_count bigint comment '累积被收藏次数',
      evaluation_last_30d_good_count bigint comment '最近30日好评数',
      evaluation_last_30d_mid_count bigint comment '最近30日中评数',
      evaluation_last_30d_bad_count bigint comment '最近30日差评数',
      evaluation_good_count bigint comment '累积好评数',
      evaluation_mid_count bigint comment '累积中评数',
      evaluation_bad_count bigint comment '累积差评数'
    )
    COMMENT '商品主题宽表'
    ROW format delimited fields terminated BY '\t' 
    stored AS orc tblproperties ('orc.compress' = 'SNAPPY');
    
  • 计算总累计值

  • 计算最近30天
  • 合并处理

增量实现方案