一、补充:
1、hive的本地模式
可以在hive的conf下,创建一个.hiverc 在这个里面写配置文件。
复习:hive的配置文件的三种写法
hive.exec.mode.local.auto.inputbytes.max
如果 hive.exec.mode.local.auto 为 true,当输入文件大小小于此阈值时可以自动在本地模式运行,默认是 128兆。
134217728
hive.exec.mode.local.auto.input.files.max 当map小于这个任务数时启动本地模式。
默认值:
hive.exec.mode.local.auto=true (默认是false)
hive.exec.mode.local.auto.inputbytes.max=134217728(默认是128M)
hive.exec.mode.local.auto.input.files.max=4(默认是4)
set hive.exec.mode.local.auto=true 是打开 hive ⾃动判断是否启动本地模式的开关,但是只是打开这个参数并不能保证启动本地模式,要当
map 任务数不超过
hive.exec.mode.local.auto.input.files.max 的个数并且 map 输⼊⽂件⼤⼩不超过
hive.exec.mode.local.auto.inputbytes.max 所指定的⼤⼩时,才能启动本地模式。
2、case when 有两种用法,等值查询和非等值查询。
等值查询: select case sex when 1 then ‘male’ when 2 then ‘female’ else ‘mixed’ end as sexual,sex from tbl
select case
when price>100 then ‘expensive’ when price<=100 then ‘cheap’ else ‘异常值’ end as price,价格分类 from tbl
和sum一起使用:
:::danger
select
sum(case u.sex when 1 then 1 else 0 end)男性,
sum(case u.sex when 2 then 1 else 0 end)女性,
sum(case when u.sex <>1 and u.sex<>2 then 1 else 0 end)性别为空
from users u;
:::
二、ads的指标
先创建一个数据库 ads_nshop;
drop database ads_nshop cascade;
create database ads_nshop;
use ads_nshop;
1、流量类指标_平台统计表
按照性别,年龄,以及所在区域统计网站访问量以及人均访问数量
create external table if not exists ads_nshop.ads_nshop_platform_flow_stat(
customer_gender TINYINT COMMENT '性别:1男 0女',
age_range string COMMENT '年龄段',
customer_natives string COMMENT '所在地区',
visit_count int comment '访问量',
visit_avg_counts int comment '人均页面访问数'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/operation/ads_nshop_platform_flow_stat/'
分析:需要两个表:
dws_nshop.dws_nshop_ulog_view 和 customer 表进行关联
insert overwrite table ads_nshop.ads_nshop_platform_flow_stat partition(bdp_day='20220509')
select
b.customer_gender,
b.customer_age_range,
b.customer_natives,
sum(view_count),
sum(view_count)/count(a.user_id)
from
dws_nshop.dws_nshop_ulog_view a
join
ods_nshop.ods_02_customer b
on a.user_id = b.customer_id
where a.bdp_day='20220509'
group by
b.customer_gender,
b.customer_age_range,
b.customer_natives
2、流量类指标_平台流量统计P/UV
create external table if not exists ads_nshop.ads_nshop_flowpu_stat(
uv bigint comment '独立访客数',
pv bigint comment '页面访客数',
pv_avg double comment '人均页面访问数'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/operation/ads_nshop_flow/'
一个表就可以完成:dws_nshop.dws_nshop_ulog_view
insert overwrite table ads_nshop.ads_nshop_flowpu_stat partition(bdp_day='20220509')
select
count(distinct user_id),
sum(view_count),
sum(view_count)/count(distinct user_id)
from
dws_nshop.dws_nshop_ulog_view
where bdp_day='20220509';
3、平台搜索热词统计
create external table if not exists ads_nshop.ads_nshop_search_keys(
search_keys string comment '搜索内容',
gender string comment '性别',
age_range string comment '年龄段',
os string comment '手机系统',
manufacturer string comment '手机制造商',
area_code string comment '地区编码',
search_users int comment '此搜索内容用户数',
search_records int comment '此搜索内容查询次数',
search_orders string comment '查询排序',
search_targets int comment '查询目标数量'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/operation/ads_nshop_search_keys/';
分析: 需要customer 表,因为性别,年龄段。 需要dwd_nshop.dwd_nshop_actlog_pdtsearch 因为里面有搜索内容。
with t1 as (
select
b.target_keys,
a.customer_gender,
a.customer_age_range,
b.os ,
b.manufacturer,
b.carrier string,
b.area_code,
b.target_order,
b.target_id,
a.customer_id
from
ods_nshop.ods_02_customer a
join
dwd_nshop.dwd_nshop_actlog_pdtsearch b
on a.customer_id = b.user_id
where b.bdp_day='20220509'
)
insert overwrite table ads_nshop.ads_nshop_search_keys partition(bdp_day='20220509')
select
target_keys,
customer_gender,
customer_age_range,
os,
manufacturer,
area_code,
count(distinct customer_id),
count(customer_id),
target_order,
count(target_id)
from t1
group by
target_keys,
customer_gender,
customer_age_range,
os,
manufacturer,
area_code,
target_order;
4、用户类启动情况表
create external table if not exists ads_nshop.ads_nshop_actlog_launch_gsets(
os string comment '手机系统',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
user_count int comment '用户数',
launch_count int comment '启动次数'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/actlog/ads_nshop_actlog_launch_gsets/'
分析:
insert overwrite table ads_nshop.ads_nshop_actlog_launch_gsets partition(bdp_day='20220509')
select
os,
manufacturer,
carrier,
network_type,
area_code,
count(distinct user_id),
sum(launch_count)
from
dws_nshop.dws_nshop_ulog_launch
where bdp_day='20220509'
group by
os,
manufacturer,
carrier,
network_type,
area_code;
多维分析的情况:
create external table if not exists ads_nshop.ads_nshop_actlog_launch_rollup(
os string comment '手机系统',
manufacturer string comment '手机制造商',
carrier string comment '电信运营商',
network_type string comment '网络类型',
area_code string comment '地区编码',
user_count int comment '用户数',
launch_count int comment '启动次数'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/actlog/ads_nshop_actlog_launch_rollup/'
insert overwrite table ads_nshop.ads_nshop_actlog_launch_rollup partition(bdp_day='20220509')
select
os,
manufacturer,
carrier,
network_type,
area_code,
count(distinct user_id),
sum(launch_count)
from
dws_nshop.dws_nshop_ulog_launch
where bdp_day='20220509'
group by
os,
manufacturer,
carrier,
network_type,
area_code
with rollup;
5、用户类指标统计表
create external table if not exists ads_nshop.ads_nshop_customer_stat(
customer_gender TINYINT COMMENT '性别:1男 0女',
age_range string COMMENT '年龄段',
customer_natives string COMMENT '所在地区',
total_counts int comment '总用户数',
add_counts int comment '新增用户数',
active_counts int comment '活跃用户数'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/operation/ads_nshop_customer_stat/'
可以使用用户表 ods_nshop.ods_02_customer 以及启动表dws_nshop.dws_nshop_ulog_launch。
用到了一个函数:select from_unixtime(cast(1572608342000/1000 as int),’yyyyMMdd’) ;
什么是新增用户?就是注册时间是某一天,对于某一天来讲,就是新增。
insert overwrite table ads_nshop.ads_nshop_customer_stat partition(bdp_day='20220509')
select
a.customer_gender,
a.customer_age_range,
a.customer_natives,
count(distinct a.customer_id) as total_counts,
count(case when from_unixtime(cast(a.customer_ctime/1000 as int),'yyyyMMdd')='20191102' then 1 else 0 end) as add_counts,
count(b.user_id) as active_counts
from ods_nshop.ods_02_customer a
join dws_nshop.dws_nshop_ulog_launch b
on a.customer_id = b.user_id
where b.bdp_day='20220509'
group by
a.customer_gender,
a.customer_age_range,
a.customer_natives;
6、总体运营指标统计表
create external table if not exists ads_nshop.ads_nshop_oper_stat(
customer_gender TINYINT COMMENT '性别:1男 0女',
age_range string COMMENT '年龄段',
customer_natives string COMMENT '所在地区',
product_type int comment '商品类别',
order_counts int comment '订单数',
order_rate double comment '下单率',
order_amounts int comment '销售总金额',
order_discounts int comment '优惠总金额',
shipping_amounts int comment '运费总金额',
per_customer_transaction int comment '客单价'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/operation/ads_nshop_oper_stat/'
分析:前面三个字段是用户表里的字段(customer),商品表(dim_pub_product),
订单数量等信息表示肯定跟订单有关(order_details),根据下单率应该推算出来有商品的浏览表(ulog_view)。
用户表:ods_nshop.ods_02_customer
商品表: ods_nshop.dim_pub_product
订单明细表:dwd_nshop.dwd_nshop_orders_details
浏览情况表:dws_nshop.dws_nshop_ulog_view
insert overwrite table ads_nshop.ads_nshop_oper_stat partition(bdp_day='20220509')
select
a.customer_gender,
a.customer_age_range,
a.customer_natives,
c.category_code,
count(distinct b.order_id),
count(distinct b.order_id)/sum(d.view_count),
sum(b.payment_money),
sum(b.district_money),
sum(b.shipping_money),
sum(b.payment_money)/count(distinct b.customer_id)
from
ods_nshop.ods_02_customer a
join
dwd_nshop.dwd_nshop_orders_details b
on a.customer_id =b.customer_id
join ods_nshop.dim_pub_product c
on b.product_code = c.product_code
join dws_nshop.dws_nshop_ulog_view d
on d.user_id = a.customer_id
group by
a.customer_gender,
a.customer_age_range,
a.customer_natives,
c.category_code
7、风控类指标统计表
create external table if not exists ads_nshop.ads_nshop_risk_mgt(
customer_gender TINYINT COMMENT '性别:1男 0女',
age_range string COMMENT '年龄段',
customer_natives string COMMENT '所在地区',
product_type int comment '商品类别',
start_complaint_counts int comment '发起投诉数',
complaint_rate float comment '投诉率'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/operation/ads_nshop_risk_mgt/';
分析:
用户表:ods_nshop.ods_02_customer
商品表: ods_nshop.dim_pub_product
/投诉表:dws_nshop.dws_nshop_user_complainant
投诉率= 投诉数量/订单总数/
关联订单明细表:dwd_nshop.dwd_nshop_orders_details
insert overwrite table ads_nshop.ads_nshop_risk_mgt partition(bdp_day='20220509')
select
a.customer_gender,
a.customer_age_range,
a.customer_natives,
c.category_code,
count(distinct case when b.order_status=6 then b.order_id end),
count(distinct case when b.order_status=6 then b.order_id end)/count(distinct b.order_id)
from
ods_nshop.ods_02_customer a
join
dwd_nshop.dwd_nshop_orders_details b
on a.customer_id =b.customer_id
join ods_nshop.dim_pub_product c
on b.product_code = c.product_code
where b.bdp_day='20220509'
group by
a.customer_gender,
a.customer_age_range,
a.customer_natives,
c.category_code;
8、支付类指标统计
create external table if not exists ads_nshop.ads_nshop_pay_stat_gsets(
customer_gender TINYINT COMMENT '性别:1男 0女',
age_range string COMMENT '年龄段',
customer_natives string COMMENT '所在地区',
pay_type string COMMENT '支付类型',
pay_nettype string COMMENT '支付网络方式',
pay_amounts int comment '支付金额',
pay_success int comment '支付成功数',
pay_fail int comment '支付失败数',
pay_order_counts int comment '支付订单数',
pay_user_counts int comment '支付用户数',
pay_product_counts int comment '支付商品数',
order_pay_amount_rate float comment '下单-支付金额转换率',
order_pay_user_rate float comment '下单-支付买家数转换率'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/operation/ads_nshop_pay_stat_gsets/';
分析:ods_nshop.ods_02_customer 表,dwd_nshop.dwd_nshop_orders_details 订单明细表,
ods_nshop.ods_02_orders_pay_records 支付记录明细表
count(某个字段),如果该字段有null值,那么就跳过。
insert overwrite table ads_nshop.ads_nshop_pay_stat_gsets partition(bdp_day='20220509')
select
a.customer_gender,
a.customer_age_range,
a.customer_natives,
b.pay_type,
b.pay_nettype,
sum(c.pay_amount),
count(case when c.pay_status='1' then 1 end),
count(case when c.pay_status='0' then 1 end),
count(distinct b.order_id),
count(distinct a.customer_id),
sum(b.product_cnt),
sum(case when c.pay_status='1' then c.pay_amount end)/sum(b.payment_money),
count(distinct case when c.pay_status='1' then c.customer_id end)/count(distinct b.customer_id)
from
ods_nshop.ods_02_customer a
join
dwd_nshop.dwd_nshop_orders_details b
on a.customer_id = b.customer_id
join
ods_nshop.ods_02_orders_pay_records c
on b.order_id = c.order_id
where b.bdp_day="20220509"
group by
a.customer_gender,
a.customer_age_range,
a.customer_natives,
b.pay_type,
b.pay_nettype
执行报错:
关闭本地模式:
set hive.exec.mode.local.auto=false;
9、支付统计TopN表
create external table if not exists ads_nshop.ads_nshop_pay_stat_topn(
pay_type string COMMENT '支付类型',
customer_area_code string COMMENT '所在地区',
pay_count int COMMENT '支付数量',
pay_amounts int comment '支付金额'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/operation/ads_nshop_pay_stat_topn/';
地区字段只有customer表有,所以需要customer表,剩余的都是支付的数据,使用支付明细表ods_nshop.ods_02_orders_pay_records
insert overwrite table ads_nshop.ads_nshop_pay_stat_topn partition(bdp_day='20220509')
select
b.pay_type,
a.customer_natives,
count(distinct b.pay_id) as pay_count,
sum(b.pay_amount) as pay_amount
from
ods_nshop.ods_02_customer a
join
ods_nshop.ods_02_orders_pay_records b
on a.customer_id = b.customer_id
group by
a.customer_natives,
b.pay_type
order by
pay_amount desc
limit 50;
10、交易类指标表
create external table if not exists ads_nshop.ads_nshop_busi_stat(
customer_gender TINYINT COMMENT '性别:1男 0女',
age_range string COMMENT '年龄段',
customer_area_code string COMMENT '所在地区',
category_type string COMMENT '商品类别',
supplier_type string COMMENT '店铺类别',
busi_succ_orders int comment '交易成功订单数',
busi_succ_amounts int comment '交易成功金额',
busi_succ_users int comment '交易成功买家数',
busi_succ_products int comment '交易成功商品数',
busi_fail_orders int comment '交易失败订单数',
busi_fail_amounts int comment '交易失败金额'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/operation/ads_nshop_busi_stat/';
有一个customer, 一个 订单详情表,商家表(供货商), 商品表
insert overwrite table ads_nshop.ads_nshop_busi_stat partition(bdp_day='20220509')
select
a.customer_gender,
a.customer_age_range,
a.customer_natives,
d.category_code,
c.supplier_type,
count(distinct case when b.order_status!=7 then b.order_id end),
sum(distinct case when b.order_status!=7 then b.payment_money end),
count(distinct case when b.order_status!=7 then b.customer_id end),
count(case when b.order_status!=7 then b.product_code end),
count(distinct case when b.order_status=7 then b.order_id end),
sum(distinct case when b.order_status=7 then b.payment_money end),
from
ods_nshop.ods_02_customer a
join
dwd_nshop.dwd_nshop_orders_details b
on a.customer_id = b.customer_id
join ods_nshop.dim_pub_supplier c
on b.supplier_code = c.supplier_code
join ods_nshop.dim_pub_product d
on b.product_code = d.product_code
where b.bdp_day='20220509'
group by
a.customer_gender,
a.customer_age_range,
a.customer_natives,
d.category_code,
c.supplier_type
11、交易TopN统计表
create external table if not exists ads_nshop.ads_nshop_busi_stat_topn(
customer_natives string COMMENT '所在地区',
category_type string COMMENT '商品类别',
supplier_type string COMMENT '店铺类别',
busi_succ_orders int comment '交易成功订单数',
busi_succ_amounts int comment '交易成功金额',
busi_succ_users int comment '交易成功买家数',
busi_succ_products int comment '交易成功商品数',
busi_fail_orders int comment '交易失败订单数',
busi_fail_amounts int comment '交易失败金额'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/operation/ads_nshop_busi_stat_topn/';
insert overwrite table ads_nshop.ads_nshop_busi_stat_topn partition(bdp_day='20220509')
select
a.customer_natives,
d.category_code,
c.supplier_type,
count(distinct case when b.order_status!=7 then b.order_id end),
sum(distinct case when b.order_status!=7 then b.payment_money end) as pay_money,
count(distinct case when b.order_status!=7 then b.customer_id end),
count(case when b.order_status!=7 then b.product_code end),
count(distinct case when b.order_status=7 then b.order_id end),
sum(distinct case when b.order_status=7 then b.payment_money end)
from
ods_nshop.ods_02_customer a
join
dwd_nshop.dwd_nshop_orders_details b
on a.customer_id = b.customer_id
join ods_nshop.dim_pub_supplier c
on b.supplier_code = c.supplier_code
join ods_nshop.dim_pub_product d
on b.product_code = d.product_code
where b.bdp_day='20220509'
group by
a.customer_natives,
d.category_code,
c.supplier_type
order by pay_money desc
limit 100;
12、广告投放类指标统计
create external table if not exists ads_nshop.ads_nshop_release_stat(
device_type string comment '设备类型',
os string comment '手机系统',
customer_gender TINYINT comment '性别:1男 0女',
age_range string comment '年龄段',
customer_natives string comment '所在地区',
release_sources string comment '投放渠道',
release_category string comment '投放浏览产品分类',
visit_total_customers int comment '总访客数',
visit_total_counts int comment '总访问次数'
) partitioned by (bdp_day string)
row format delimited fields terminated by ','
stored as textfile
location '/data/nshop/ads/operation/ads_nshop_release_stat/';
分析:
需要一个客户表以及dwd_nshop.dwd_nshop_releasedatas
insert overwrite table ads_nshop.ads_nshop_release_stat partition(bdp_day='20220509')
select
a.device_type,
a.os,
b.customer_gender,
b.customer_age_range,
b.customer_natives,
a.release_sources,
a.release_category,
count(distinct a.customer_id),
count(1)
from
dwd_nshop.dwd_nshop_releasedatas a
join
ods_nshop.ods_02_customer b
on a.customer_id = b.customer_id
where a.bdp_day='20220509'
group by
a.device_type,
a.os,
b.customer_gender,
b.customer_age_range,
b.customer_natives,
a.release_sources,
a.release_category