一、用户启动DWS

在运行之前,先创建数据库:

  1. drop database dws_nshop cascade;
  2. create database dws_nshop;
  1. create external table if not exists dws_nshop.dws_nshop_ulog_launch(
  2. user_id string comment '用户id',
  3. device_num string comment '设备号',
  4. device_type string comment '设备类型',
  5. os string comment '手机系统',
  6. os_version string comment '手机系统版本',
  7. manufacturer string comment '手机制造商',
  8. carrier string comment '电信运营商',
  9. network_type string comment '网络类型',
  10. area_code string comment '地区编码',
  11. launch_count int comment '启动次数'
  12. ) partitioned by (bdp_day string)
  13. stored as parquet
  14. location '/data/nshop/dws/user/dws_nshop_ulog_launch/'
  1. insert overwrite table dws_nshop.dws_nshop_ulog_launch partition(bdp_day='20220509')
  2. select
  3. distinct user_id ,
  4. device_num ,
  5. device_type ,
  6. os ,
  7. os_version ,
  8. manufacturer,
  9. carrier ,
  10. network_type,
  11. area_code ,
  12. count(user_id) over (partition by user_id) launch_count
  13. from dwd_nshop.dwd_nshop_actlog_launch
  14. where bdp_day="20220509";

二、用户浏览DWS

  1. create external table if not exists dws_nshop.dws_nshop_ulog_view(
  2. user_id string comment '用户id',
  3. device_num string comment '设备号',
  4. device_type string comment '设备类型',
  5. os string comment '手机系统',
  6. os_version string comment '手机系统版本',
  7. manufacturer string comment '手机制造商',
  8. carrier string comment '电信运营商',
  9. network_type string comment '网络类型',
  10. area_code string comment '地区编码',
  11. view_count int comment '浏览次数'
  12. ) partitioned by (bdp_day string)
  13. stored as parquet
  14. location '/data/nshop/dws/user/dws_nshop_ulog_view/'
  1. insert overwrite table dws_nshop.dws_nshop_ulog_view partition(bdp_day='20220509')
  2. select
  3. distinct user_id ,
  4. device_num ,
  5. device_type ,
  6. os ,
  7. os_version ,
  8. manufacturer,
  9. carrier ,
  10. network_type,
  11. area_code ,
  12. count(user_id) over(partition by user_id) view_count
  13. from dwd_nshop.dwd_nshop_actlog_pdtview
  14. where bdp_day="20220509";

三、用户查询dws

  1. create external table if not exists dws_nshop.dws_nshop_ulog_search(
  2. user_id string comment '用户id',
  3. device_num string comment '设备号',
  4. device_type string comment '设备类型',
  5. os string comment '手机系统',
  6. os_version string comment '手机系统版本',
  7. manufacturer string comment '手机制造商',
  8. carrier string comment '电信运营商',
  9. network_type string comment '网络类型',
  10. area_code string comment '地区编码',
  11. search_count int comment '搜索次数'
  12. ) partitioned by (bdp_day string)
  13. stored as parquet
  14. location '/data/nshop/dws/user/dws_nshop_ulog_search/'
  1. insert overwrite table dws_nshop.dws_nshop_ulog_search partition(bdp_day='20220509')
  2. select
  3. distinct user_id ,
  4. device_num ,
  5. device_type ,
  6. os ,
  7. os_version ,
  8. manufacturer,
  9. carrier ,
  10. network_type,
  11. area_code ,
  12. count(user_id) over(partition by user_id) search_count
  13. from dwd_nshop.dwd_nshop_actlog_pdtsearch
  14. where bdp_day="20220509"

四、用户关注dws

  1. create external table if not exists dws_nshop.dws_nshop_ulog_comment(
  2. user_id string comment '用户id',
  3. device_num string comment '设备号',
  4. device_type string comment '设备类型',
  5. os string comment '手机系统',
  6. os_version string comment '手机系统版本',
  7. manufacturer string comment '手机制造商',
  8. carrier string comment '电信运营商',
  9. network_type string comment '网络类型',
  10. area_code string comment '地区编码',
  11. comment_count int comment '关注次数',-- 不去重
  12. comment_target_count int comment '关注产品次数',--去重
  13. ct bigint comment '产生时间'
  14. ) partitioned by (bdp_day string)
  15. stored as parquet
  16. location '/data/nshop/dws/user/dws_nshop_ulog_comment/'

编写SQL语句:

  1. insert overwrite table dws_nshop.dws_nshop_ulog_comment partition(bdp_day='20220509')
  2. select
  3. distinct user_id ,
  4. device_num ,
  5. device_type ,
  6. os ,
  7. os_version ,
  8. manufacturer,
  9. carrier ,
  10. network_type,
  11. area_code ,
  12. count(user_id) over(partition by user_id) as comment_count,
  13. count(distinct user_id) over(partition by user_id) as comment_target_count,
  14. current_timestamp()
  15. from dwd_nshop.dwd_actlog_product_comment
  16. where bdp_day="20220509"

五、用户交易宽表

  1. create external table if not exists dws_nshop.dws_nshop_user_orders(
  2. user_id string comment '用户id',
  3. customer_natives string comment '所在区域',
  4. orders_count int comment '订单数量',
  5. orders_pay DECIMAL(10,1) comment '订单金额',
  6. orders_shipping DECIMAL(10,1) comment '订单运费金额',
  7. orders_district DECIMAL(10,1) comment '订单优惠金额',
  8. ct bigint comment '产生时间'
  9. ) partitioned by (bdp_day string)
  10. stored as parquet
  11. location '/data/nshop/dws/user/dws_nshop_user_orders/'

发现这个里面的数据:customer_natives 是我们的customer表,剩余的是订单明细表

  1. with t1 as (
  2. select
  3. a.customer_id,
  4. a.district_money ,
  5. a.shipping_money,
  6. a.payment_money,
  7. a.order_id,
  8. b.customer_natives
  9. from
  10. dwd_nshop.dwd_nshop_orders_details a
  11. join ods_nshop.ods_02_customer b
  12. on a.customer_id = b.customer_id
  13. where a.bdp_day = '20220509'
  14. )
  15. insert overwrite table dws_nshop.dws_nshop_user_orders partition(bdp_day='20220509')
  16. select
  17. customer_id,
  18. customer_natives,
  19. count(order_id) over(partition by customer_id),
  20. sum(payment_money) over(partition by customer_id),
  21. sum(shipping_money) over(partition by customer_id),
  22. sum(district_money) over(partition by customer_id),
  23. current_timestamp()
  24. from
  25. t1;

六、用户投诉宽表

  1. create external table if not exists dws_nshop.dws_nshop_user_complainant(
  2. user_id string comment '用户id',
  3. area_code string comment '地区编码',
  4. compl_orders_count int comment '订单数量',
  5. compl_orders_pay DECIMAL(10,1) comment '订单金额',
  6. compl_supplier_count int comment '商家数量',
  7. ct bigint comment '产生时间'
  8. ) partitioned by (bdp_day string)
  9. stored as parquet
  10. location '/data/nshop/dws/user/dws_nshop_user_complainant/'

订单状态为6的是投诉。

  1. insert overwrite table dws_nshop.dws_nshop_user_complainant partition(bdp_day='20220509')
  2. select
  3. distinct a.customer_id,
  4. b.customer_natives,
  5. count(a.order_id) over(partition by a.customer_id),
  6. sum(a.payment_money) over(partition by a.customer_id),
  7. sum(a.supplier_code) over(partition by a.customer_id),
  8. current_timestamp()
  9. from
  10. dwd_nshop.dwd_nshop_orders_details a
  11. join ods_nshop.ods_02_customer b
  12. on a.customer_id = b.customer_id
  13. where a.bdp_day = '20220509' and a.order_status=6;

七、商家用户浏览宽表

  1. create external table if not exists dws_nshop.dws_nshop_supplier_user(
  2. supplier_id string comment '商家id',
  3. supplier_type int comment '供应商类型:1.自营,2.官方 3其他',
  4. view_count int comment '浏览次数',
  5. comment_users int comment '关注人数',
  6. comment_area_code int comment '关注地区数量',
  7. ct bigint comment '产生时间'
  8. ) partitioned by (bdp_day string)
  9. stored as parquet
  10. location '/data/nshop/dws/supplier/dws_nshop_supplier_user/'

首先必须有供应商这个表ods_nshop.dim_pub_supplier,用户浏览表dwd_nshop.dwd_nshop_actlog_pdtview 而我们的 用户浏览表只有一个target_id(访问页面的编号),
target_id 跟 dim_pub_page 这个表中的page_code 是直接关系,dim_pub_page 中有商品编号(page_target)
根据商品编号怎么知道供应商的编号?
寻找商品表(ods_nshop.dim_pub_product) 里面有一个供应商的编码(supplier_code)
Day03-项目第三天DWS层数据整理 - 图1

  1. insert overwrite table dws_nshop.dws_nshop_supplier_user partition(bdp_day='20220509')
  2. select
  3. d.supplier_code,
  4. d.supplier_type,
  5. count(1) as view_count,
  6. count(distinct a.user_id) as comment_users,
  7. count(distinct a.area_code) as comment_area_code,
  8. current_timestamp()
  9. from dwd_nshop.dwd_nshop_actlog_pdtview a
  10. join ods_nshop.dim_pub_page b
  11. on a.target_id= b.page_code
  12. join ods_nshop.dim_pub_product c
  13. on b.page_target = c.product_code
  14. join ods_nshop.dim_pub_supplier d
  15. on c.supplier_code= d.supplier_code
  16. where bdp_day='20220509'
  17. group by d.supplier_code,d.supplier_type;

八、商家日销售宽表

  1. create external table if not exists dws_nshop.dws_nshop_supplier_sales(
  2. supplier_id string comment '商家id',
  3. supplier_type int comment '供应商类型:1.自营,2.官方 3其他',
  4. sales_users int comment '购物人数',
  5. sales_users_area int comment '购物地区数量',
  6. sales_orders int comment '购物订单数',
  7. salaes_orders_pay DECIMAL(10,1) comment '订单金额',
  8. salaes_orders_district DECIMAL(10,1) comment '订单优惠金额',
  9. ct bigint comment '产生时间'
  10. ) partitioned by (bdp_day string)
  11. stored as parquet
  12. location '/data/nshop/dws/supplier/dws_nshop_supplier_sales/'
  1. insert overwrite table dws_nshop.dws_nshop_supplier_sales partition(bdp_day='20220509')
  2. select
  3. a.supplier_code,
  4. c.supplier_type,
  5. count(distinct a.customer_id) sales_users,
  6. count(distinct a.consignee_zipcode) as sales_users_area,
  7. count(a.order_id) as sales_orders,
  8. sum(a.payment_money) as salaes_orders_pay,
  9. sum(a.district_money) as salaes_orders_district,
  10. current_timestamp()
  11. from dwd_nshop.dwd_nshop_orders_details a
  12. join ods_nshop.dim_pub_product b
  13. on a.product_code = b.product_code
  14. join ods_nshop.dim_pub_supplier c
  15. on c.supplier_code= b.supplier_code
  16. where bdp_day='20220509'
  17. group by a.supplier_code,c.supplier_type;

九、广告投放用户宽表

  1. create external table if not exists dws_nshop.dws_nshop_release_user(
  2. release_sources string comment '投放渠道',
  3. release_category string comment '投放浏览产品分类',
  4. release_users int comment '投放浏览用户数',
  5. release_product_page int comment '投放浏览产品页面数',
  6. ct bigint comment '创建时间'
  7. ) partitioned by (bdp_day string)
  8. stored as parquet
  9. location '/data/nshop/dws/release/dws_nshop_release_user/'
  1. insert overwrite table dws_nshop.dws_nshop_release_user partition(bdp_day='20220509')
  2. select
  3. release_sources,
  4. release_category,
  5. count(distinct customer_id),
  6. count(*),
  7. current_timestamp()
  8. from
  9. dwd_nshop.dwd_nshop_releasedatas
  10. where bdp_day="20220509"
  11. group by
  12. release_sources,
  13. release_category

十、用户营销活动宽表

  1. create external table if not exists dws_nshop.dws_nshop_user_release(
  2. user_id string comment '用户id',
  3. os string comment '手机系统',
  4. os_version string comment '手机系统版本',
  5. manufacturer string comment '手机制造商',
  6. carrier string comment '电信运营商',
  7. network_type string comment '网络类型',
  8. area_code string comment '地区编码',
  9. source_count int comment '投放来源数量',
  10. ct bigint comment '产生时间'
  11. ) partitioned by (bdp_day string)
  12. stored as parquet
  13. location '/data/nshop/dws/user/dws_nshop_user_release/';
  1. insert overwrite table dws_nshop.dws_nshop_user_release partition(bdp_day='20220509')
  2. select
  3. a.customer_id,
  4. a.os ,
  5. a.os_version ,
  6. a.manufacturer,
  7. b.carrier,
  8. b.network_type,
  9. a.area_code ,
  10. count(1) over (partition by a.release_sources),
  11. current_timestamp()
  12. from
  13. dwd_nshop.dwd_nshop_releasedatas a
  14. join ods_nshop.ods_nshop_01_useractlog b
  15. on a.customer_id = b.customer_id;