一、用户启动DWS
在运行之前,先创建数据库:
drop database dws_nshop cascade;
create database dws_nshop;
create external table if not exists dws_nshop.dws_nshop_ulog_launch(
user_id string comment '用户id',
device_num string comment '设备号',
device_type string comment '设备类型',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
launch_count int comment '启动次数'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/dws/user/dws_nshop_ulog_launch/'
insert overwrite table dws_nshop.dws_nshop_ulog_launch partition(bdp_day='20220509')
select
distinct user_id ,
device_num ,
device_type ,
os ,
os_version ,
manufacturer,
carrier ,
network_type,
area_code ,
count(user_id) over (partition by user_id) launch_count
from dwd_nshop.dwd_nshop_actlog_launch
where bdp_day="20220509";
二、用户浏览DWS
create external table if not exists dws_nshop.dws_nshop_ulog_view(
user_id string comment '用户id',
device_num string comment '设备号',
device_type string comment '设备类型',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
view_count int comment '浏览次数'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/dws/user/dws_nshop_ulog_view/'
insert overwrite table dws_nshop.dws_nshop_ulog_view partition(bdp_day='20220509')
select
distinct user_id ,
device_num ,
device_type ,
os ,
os_version ,
manufacturer,
carrier ,
network_type,
area_code ,
count(user_id) over(partition by user_id) view_count
from dwd_nshop.dwd_nshop_actlog_pdtview
where bdp_day="20220509";
三、用户查询dws
create external table if not exists dws_nshop.dws_nshop_ulog_search(
user_id string comment '用户id',
device_num string comment '设备号',
device_type string comment '设备类型',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
search_count int comment '搜索次数'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/dws/user/dws_nshop_ulog_search/'
insert overwrite table dws_nshop.dws_nshop_ulog_search partition(bdp_day='20220509')
select
distinct user_id ,
device_num ,
device_type ,
os ,
os_version ,
manufacturer,
carrier ,
network_type,
area_code ,
count(user_id) over(partition by user_id) search_count
from dwd_nshop.dwd_nshop_actlog_pdtsearch
where bdp_day="20220509"
四、用户关注dws
create external table if not exists dws_nshop.dws_nshop_ulog_comment(
user_id string comment '用户id',
device_num string comment '设备号',
device_type string comment '设备类型',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
comment_count int comment '关注次数',-- 不去重
comment_target_count int comment '关注产品次数',--去重
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/dws/user/dws_nshop_ulog_comment/'
编写SQL语句:
insert overwrite table dws_nshop.dws_nshop_ulog_comment partition(bdp_day='20220509')
select
distinct user_id ,
device_num ,
device_type ,
os ,
os_version ,
manufacturer,
carrier ,
network_type,
area_code ,
count(user_id) over(partition by user_id) as comment_count,
count(distinct user_id) over(partition by user_id) as comment_target_count,
current_timestamp()
from dwd_nshop.dwd_actlog_product_comment
where bdp_day="20220509"
五、用户交易宽表
create external table if not exists dws_nshop.dws_nshop_user_orders(
user_id string comment '用户id',
customer_natives string comment '所在区域',
orders_count int comment '订单数量',
orders_pay DECIMAL(10,1) comment '订单金额',
orders_shipping DECIMAL(10,1) comment '订单运费金额',
orders_district DECIMAL(10,1) comment '订单优惠金额',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/dws/user/dws_nshop_user_orders/'
发现这个里面的数据:customer_natives 是我们的customer表,剩余的是订单明细表
with t1 as (
select
a.customer_id,
a.district_money ,
a.shipping_money,
a.payment_money,
a.order_id,
b.customer_natives
from
dwd_nshop.dwd_nshop_orders_details a
join ods_nshop.ods_02_customer b
on a.customer_id = b.customer_id
where a.bdp_day = '20220509'
)
insert overwrite table dws_nshop.dws_nshop_user_orders partition(bdp_day='20220509')
select
customer_id,
customer_natives,
count(order_id) over(partition by customer_id),
sum(payment_money) over(partition by customer_id),
sum(shipping_money) over(partition by customer_id),
sum(district_money) over(partition by customer_id),
current_timestamp()
from
t1;
六、用户投诉宽表
create external table if not exists dws_nshop.dws_nshop_user_complainant(
user_id string comment '用户id',
area_code string comment '地区编码',
compl_orders_count int comment '订单数量',
compl_orders_pay DECIMAL(10,1) comment '订单金额',
compl_supplier_count int comment '商家数量',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/dws/user/dws_nshop_user_complainant/'
订单状态为6的是投诉。
insert overwrite table dws_nshop.dws_nshop_user_complainant partition(bdp_day='20220509')
select
distinct a.customer_id,
b.customer_natives,
count(a.order_id) over(partition by a.customer_id),
sum(a.payment_money) over(partition by a.customer_id),
sum(a.supplier_code) over(partition by a.customer_id),
current_timestamp()
from
dwd_nshop.dwd_nshop_orders_details a
join ods_nshop.ods_02_customer b
on a.customer_id = b.customer_id
where a.bdp_day = '20220509' and a.order_status=6;
七、商家用户浏览宽表
create external table if not exists dws_nshop.dws_nshop_supplier_user(
supplier_id string comment '商家id',
supplier_type int comment '供应商类型:1.自营,2.官方 3其他',
view_count int comment '浏览次数',
comment_users int comment '关注人数',
comment_area_code int comment '关注地区数量',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/dws/supplier/dws_nshop_supplier_user/'
首先必须有供应商这个表ods_nshop.dim_pub_supplier,用户浏览表dwd_nshop.dwd_nshop_actlog_pdtview 而我们的 用户浏览表只有一个target_id(访问页面的编号),
target_id 跟 dim_pub_page 这个表中的page_code 是直接关系,dim_pub_page 中有商品编号(page_target)
根据商品编号怎么知道供应商的编号?
寻找商品表(ods_nshop.dim_pub_product) 里面有一个供应商的编码(supplier_code)
insert overwrite table dws_nshop.dws_nshop_supplier_user partition(bdp_day='20220509')
select
d.supplier_code,
d.supplier_type,
count(1) as view_count,
count(distinct a.user_id) as comment_users,
count(distinct a.area_code) as comment_area_code,
current_timestamp()
from dwd_nshop.dwd_nshop_actlog_pdtview a
join ods_nshop.dim_pub_page b
on a.target_id= b.page_code
join ods_nshop.dim_pub_product c
on b.page_target = c.product_code
join ods_nshop.dim_pub_supplier d
on c.supplier_code= d.supplier_code
where bdp_day='20220509'
group by d.supplier_code,d.supplier_type;
八、商家日销售宽表
create external table if not exists dws_nshop.dws_nshop_supplier_sales(
supplier_id string comment '商家id',
supplier_type int comment '供应商类型:1.自营,2.官方 3其他',
sales_users int comment '购物人数',
sales_users_area int comment '购物地区数量',
sales_orders int comment '购物订单数',
salaes_orders_pay DECIMAL(10,1) comment '订单金额',
salaes_orders_district DECIMAL(10,1) comment '订单优惠金额',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/dws/supplier/dws_nshop_supplier_sales/'
insert overwrite table dws_nshop.dws_nshop_supplier_sales partition(bdp_day='20220509')
select
a.supplier_code,
c.supplier_type,
count(distinct a.customer_id) sales_users,
count(distinct a.consignee_zipcode) as sales_users_area,
count(a.order_id) as sales_orders,
sum(a.payment_money) as salaes_orders_pay,
sum(a.district_money) as salaes_orders_district,
current_timestamp()
from dwd_nshop.dwd_nshop_orders_details a
join ods_nshop.dim_pub_product b
on a.product_code = b.product_code
join ods_nshop.dim_pub_supplier c
on c.supplier_code= b.supplier_code
where bdp_day='20220509'
group by a.supplier_code,c.supplier_type;
九、广告投放用户宽表
create external table if not exists dws_nshop.dws_nshop_release_user(
release_sources string comment '投放渠道',
release_category string comment '投放浏览产品分类',
release_users int comment '投放浏览用户数',
release_product_page int comment '投放浏览产品页面数',
ct bigint comment '创建时间'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/dws/release/dws_nshop_release_user/'
insert overwrite table dws_nshop.dws_nshop_release_user partition(bdp_day='20220509')
select
release_sources,
release_category,
count(distinct customer_id),
count(*),
current_timestamp()
from
dwd_nshop.dwd_nshop_releasedatas
where bdp_day="20220509"
group by
release_sources,
release_category
十、用户营销活动宽表
create external table if not exists dws_nshop.dws_nshop_user_release(
user_id string comment '用户id',
os string comment '手机系统',
os_version string comment '手机系统版本',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
source_count int comment '投放来源数量',
ct bigint comment '产生时间'
) partitioned by (bdp_day string)
stored as parquet
location '/data/nshop/dws/user/dws_nshop_user_release/';
insert overwrite table dws_nshop.dws_nshop_user_release partition(bdp_day='20220509')
select
a.customer_id,
a.os ,
a.os_version ,
a.manufacturer,
b.carrier,
b.network_type,
a.area_code ,
count(1) over (partition by a.release_sources),
current_timestamp()
from
dwd_nshop.dwd_nshop_releasedatas a
join ods_nshop.ods_nshop_01_useractlog b
on a.customer_id = b.customer_id;