— 多行、多列 如何转换 (MN 转换成 NM)
— 1、先将多列合并成一行 (MN 转换成 M1)

一、需求1:行转列(一行转多行)

image.png
表名:row_column_conversion
转化为如下格式:
image.png
下面看下行转列使用的函数:
lateral view explode(split表达式) tableName as columnName

  • tableName 表示虚拟表的名称。
  • columnName 表示虚拟表的虚拟字段名称,如果分裂之后有一个列,则写一个即可;如果分裂之后有多个列,按照列的顺序在括号中声明所有虚拟列名,以逗号隔开。

Hive SQL 如下:
SELECT movie, actorsnew
FROM row_column_conversion
Lateral View explode(split(actors,”,”)) tmpTable as actors_new
— split(actors,”,”)相对字符串切割,得到数组
— actorsnew 是给 explode(split(actors,”,”)) 列起的别名

explode 函数:处理数组结构的字段,将数组转换成多行。
Lateral View:其实explode是一个UDTF函数(一行输入多行输出),这个时候如果要select除了explode得到的字段以外的多个字段,需要创建虚拟表
Lateral View 用于和UDTF函数【explode,split】结合来使用
首先通过 UDTF 函数将数据拆分成多行,再将多行结果组合成一个支持别名的虚拟表。
主要解决在 select 使用UDTF做查询的过程中查询只能包含单个UDTF,不能包含其它字段以及多个UDTF的情况。
语法:LATERAL VIEW udtf(expression) tableAlias AS columnAlias (‘,’ columnAlias)

除了以上的情形外,有时候,会经常会碰到一个场景,比如手上有一张用户表,记录了用户某款产品每一天各个功能的使用次数,存储方式类似key-value键值结构。具体如下:
image.png
此时,如果我们需要记录用户在每个功能的使用次数、占比的特征,这种情况就需要行转列的操作。
— 方法1:
SELECT
uid,
MAX(case WHEN fid = 10 then num else 0 END) as fid_10,
MAX(case WHEN fid = 11 then num else 0 END) as fid_11
FROM user_row_column_conversion GROUP BY uid

— 方法2:
SELECT
uid,
MAX(if(fid = 10,num,0)) as fid_10,
MAX(if(fid = 11,num,0)) as fid_11
FROM user_row_column_conversion GROUP BY uid

输出结果如下:
image.png

二、需求2:列转行(多行转一行)

在有的场景下,需要把多行合并。
常见场景:有用户的评论流水表,此时我们想对同一个用户的所有评论拼接到一起,然后做文本分类。
将如下数据:
image.png
转化为如下格式:
image.png
Hive SQL 如下:

  • collect_list:把列聚合成为数组,不去重
  • collect_set:把列聚合成为数据,去重
  • concat_ws(separator, str1, str2, …):把多个字符串用分隔符进行拼接

— 方法1:group by + concat_ws + collect_list
SELECT
movie,
concat_ws(“,”,collect_list(actors_new)) as actors
FROM temp_row_column_conversion
GROUP BY movie

group by分组后按组别合并字符,以 groupby+group_concat(<>,’,’) 的方式可以将多行转为一行显示。

— 方法2:group by + group_concat
SELECT
movie,
group_concat(DISTINCT(actors_new),”,”)as actors
FROM temp_row_column_conversion GROUP BY movie
发布于 2021-08-20 10:22

漏斗分析代码:
— 漏斗分析大宽表
— loan_money_his:指曾经逾期0+的金额 loan_money_now:截止到当前逾期0+的金额
drop table dm_finrisk.AA_etc_funnel_users;
create table dm_finrisk.AA_etc_funnel_users as
select a.sq_time,b.card_type,a.auid,b.buid,c.cuid,d.duid,d.loan_money_his,d.loan_money_now
from
(
select concat(substr(applyno,1,4),’-‘,substr(applyno,5,2)) as sq_time,uid as auid
from dm_finrisk.blaze_event_14012 where env is null
and uid is not null and applyno is not null
group by concat(substr(applyno,1,4),’-‘,substr(applyno,5,2)) ,uid
) a
left join
(
select substr(add_time,1,7) as open_time,uid as buid,card_type
from dwd.dwd_loan_uc_user_car_dc
where
status = 0 —状态:0表示正常,1表示删除
and is_delete = 0
—and card_type = “PREPAY_CARD”
—and write_off_time=’1900-01-01 00:00:00.0’
and process_status in (4,3) —流程状态:0-初始化 1-签产品说明书 2-车辆激活 3-开卡校验通过 4-开卡成功
and card_no is not null
and card_no!=’’ —这一条一定要加
and car_no is not null
and uid is not null
and add_time is not null
group by substr(add_time,1,7),uid,card_type
) b
on a.auid=b.buid

left join
(
select cc.consume_time,dd.uid as cuid
from
(
select
substr(add_time,1,7) as consume_time
,card_no
,traffic_record_no
from dwd.dwd_etc_credit_card_pass_df
where day=${zdt.addDay(-1).format(“yyyyMMdd”)}
and status in (‘PAYMENT_SUCCESS’,’SUCCESS’) and fail_reason=’’
group by substr(add_time,1,7),card_no,traffic_record_no
) cc
left join (select * from dwd.dwd_loan_uc_user_car_dc) dd
on cc.card_no=dd.card_no
group by cc.consume_time,dd.uid
) c
on a.auid=c.cuid

left join
(
select substr(accounting_start_date,1,7) as breake_time,uid as duid
,sum(if(overdue_status=’OVERDUE’,unpaid_total,0) ) as loan_money_now —当前逾期总金额
,sum(if(is_overdue=1 and overdue_days>=0,unpaid_total+paid_total,0)) as loan_money_his —历史0+金额

,count(distinct if(is_overdue=1,bill_no,null)) as overdue_count —曾逾期次数
,count(distinct if(is_overdue=1 and overdue_days>=7,bill_no,null)) as overdue_count_7 —曾逾期7+次数
,count(distinct if(is_overdue=1 and overdue_days>=15,bill_no,null)) as overdue_count_30 —曾逾期30+次数

from dm_finrisk.dm_rs_bill_base_df
where day=${zdt.addDay(-1).format(“yyyyMMdd”)}
and bill_flag=1
and uid is not null
and bill_type=2 — 指记账卡
and accounting_start_date is not null
group by substr(accounting_start_date,1,7),uid
) d
on a.auid=d.duid;

— 人数统计:
—select * from dm_finrisk.AA_etc_funnel_0214
drop table dm_finrisk.AA_etc_funnel_counts;
create table dm_finrisk.AA_etc_funnel_counts as
select sq_time,card_type
,count(distinct auid) as sq_uid
,count(distinct buid) as open_uid
,count(distinct cuid) as consume_uid
,count(distinct duid) as debet_uid
,count(distinct if(loan_money_his>0,duid,null)) as overdue_his_uid
,count(distinct if(loan_money_now>0,duid,null)) as overdue_now_uid
from dm_finrisk.AA_etc_funnel_users
where sq_time<>’-999-9’
group by sq_time,card_type;

— 转换率计算:
drop table dm_finrisk.AA_etc_funnel_result;
create table dm_finrisk.AA_etc_funnel_result as
select sq_time
,sum(distinct sq_uid) as total_uid
,sum(distinct if(card_type=’PREPAY_CARD’,open_uid,null)) as open_uid
,sum(distinct if(card_type=’PREPAY_CARD’,consume_uid,null)) as consume_uid
,sum(distinct if(card_type=’PREPAY_CARD’,debet_uid,null)) as debet_uid
,sum(distinct if(card_type=’PREPAY_CARD’,overdue_his_uid,null)) as overdue_his_uid
,sum(distinct if(card_type=’PREPAY_CARD’,overdue_now_uid,null)) as overdue_now_uid
— 总的转换率
,sum(distinct if(card_type=’PREPAY_CARD’,open_uid,null))/sum(distinct sq_uid) as open_ratio_all
,sum(distinct if(card_type=’PREPAY_CARD’,consume_uid,null))/sum(distinct sq_uid) as consume_ratio_all
,sum(distinct if(card_type=’PREPAY_CARD’,debet_uid,null))/sum(distinct sq_uid) as debet_ratio_all
,sum(distinct if(card_type=’PREPAY_CARD’,overdue_his_uid,null))/sum(distinct sq_uid) as his_overdue_ratio_all
,sum(distinct if(card_type=’PREPAY_CARD’,overdue_now_uid,null))/sum(distinct sq_uid) as now_overdue_ratio_all
— 环节转化率
,sum(distinct if(card_type=’PREPAY_CARD’,open_uid,null))/sum(distinct sq_uid) as open_ratio_at —开卡率
,sum(distinct if(card_type=’PREPAY_CARD’,consume_uid,null))/sum(distinct if(card_type=’PREPAY_CARD’,open_uid,null)) as consume_ratio_at —激活率
,sum(distinct if(card_type=’PREPAY_CARD’,debet_uid,null))/sum(distinct if(card_type=’PREPAY_CARD’,consume_uid,null)) as debet_ratio_at —击穿率
,sum(distinct if(card_type=’PREPAY_CARD’,overdue_his_uid,null))/sum(distinct if(card_type=’PREPAY_CARD’,debet_uid,null)) as his_overdue_ratio_at —历史逾期率
,sum(distinct if(card_type=’PREPAY_CARD’,overdue_now_uid,null))/sum(distinct if(card_type=’PREPAY_CARD’,debet_uid,null)) as now_overdue_ratio_at —当前逾期率

from dm_finrisk.AA_etc_funnel_counts
group by sq_time;

— 转换下格式

————————————————————————得到 人数漏斗 ——————————————————————-
— 1、先将多列合并成一行 (MN 转换成 M1)
drop table dm_finrisk.AA_0214_etc_1;
create table dm_finrisk.AA_0214_etc_1 as
select
sq_time
,concat(TOTAL_UID,’,’,OPEN_UID,’,’,CONSUME_UID,’,’,DEBET_UID,’,’,OVERDUE_HIS_UID,’,’,OVERDUE_NOW_UID) as dd
from dm_finrisk.AA_etc_funnel_result
;
—concat:先把多列合成一行

— 2、再将一行转换成多行 (M1 转换成 M1)
drop table dm_finrisk.AA_0214_etc_2;
create table dm_finrisk.AA_0214_etc_2 as
SELECT
sq_time,name
FROM dm_finrisk.AA_0214_etc_1
—Lateral View explode(split(“1”,”2”,”3”,’4’,’5’)) tableName as name ;
Lateral View explode(split(dd,”,”)) tableName as name ;
— Lateral View 把一行转换成多行 tableName是虚拟表不用变,name是新的列名

—3、加上一列序号:
drop table dm_finrisk.AA_0214_etc_3;
create table dm_finrisk.AA_0214_etc_3 as
select row_number() over(partition by sq_time order by sq_time desc) as id, —不要排序 以原顺序为主
a.*
from dm_finrisk.AA_0214_etc_2 a;

—4、加上类型名称得到最终结果
drop table dm_finrisk.AA_0214_etc_uid_ok;
create table dm_finrisk.AA_0214_etc_uid_ok as
select sq_time
,case when id=1 then ‘TOTAL_UID’
when id=2 then ‘OPEN_UID’
when id=3 then ‘CONSUME_UID’
when id=4 then ‘DEBET_UID’
when id=5 then ‘OVERDUE_HIS_UID’
when id=6 then ‘OVERDUE_NOW_UID’
end as var_name
,name as var_value
from dm_finrisk.AA_0214_etc_3;

————————————————————————得到 总体转换率漏斗 ——————————————————————-
— 1、先将多列合并成一行 (MN 转换成 M1)
drop table dm_finrisk.AA_0214_etc_11;
create table dm_finrisk.AA_0214_etc_11 as
select
sq_time
,concat(OPEN_RATIO_ALL,’,’,CONSUME_RATIO_ALL,’,’,DEBET_RATIO_ALL,’,’,HIS_OVERDUE_RATIO_ALL,’,’,NOW_OVERDUE_RATIO_ALL) as dd
from dm_finrisk.AA_etc_funnel_result
;
—concat:先把多列合成一行

— 2、再将一行转换成多行 (1M 转换成 M1)
drop table dm_finrisk.AA_0214_etc_22;
create table dm_finrisk.AA_0214_etc_22 as
SELECT
sq_time,name
FROM dm_finrisk.AA_0214_etc_11
—Lateral View explode(split(“1”,”2”,”3”,’4’,’5’)) tableName as name ;
Lateral View explode(split(dd,”,”)) tableName as name ;
— Lateral View 把一行转换成多行 tableName是虚拟表不用变,name是新的列名

—3、加上一列序号:
drop table dm_finrisk.AA_0214_etc_33;
create table dm_finrisk.AA_0214_etc_33 as
select row_number() over(partition by sq_time order by sq_time desc) as id, —不要排序 以原顺序为主
a.*
from dm_finrisk.AA_0214_etc_22 a;

—4、加上类型名称得到最终结果
drop table dm_finrisk.AA_0214_etc_allratio_ok;
create table dm_finrisk.AA_0214_etc_allratio_ok as
select sq_time
,case when id=1 then ‘OPEN_RATIO_ALL’
when id=2 then ‘CONSUME_RATIO_ALL’
when id=3 then ‘DEBET_RATIO_ALL’
when id=4 then ‘HIS_OVERDUE_RATIO_ALL’
when id=5 then ‘NOW_OVERDUE_RATIO_ALL’
end as var_name
,name as var_value
from dm_finrisk.AA_0214_etc_33;

————————————————————————得到 节点转换率漏斗 ——————————————————————-
— 1、先将多列合并成一行 (MN 转换成 M1)
drop table dm_finrisk.AA_0214_etc_111;
create table dm_finrisk.AA_0214_etc_111 as
select
sq_time
,concat(OPEN_RATIO_AT,’,’,CONSUME_RATIO_AT,’,’,DEBET_RATIO_AT,’,’,HIS_OVERDUE_RATIO_AT,’,’,NOW_OVERDUE_RATIO_AT) as dd
from dm_finrisk.AA_etc_funnel_result
;
—concat:先把多列合成一行

— 2、再将一行转换成多行 (M1 转换成 M1)
drop table dm_finrisk.AA_0214_etc_222;
create table dm_finrisk.AA_0214_etc_222 as
SELECT
sq_time,name
FROM dm_finrisk.AA_0214_etc_111
—Lateral View explode(split(“1”,”2”,”3”,’4’,’5’)) tableName as name ;
Lateral View explode(split(dd,”,”)) tableName as name ;
— Lateral View 把一行转换成多行 tableName是虚拟表不用变,name是新的列名

—3、加上一列序号:
drop table dm_finrisk.AA_0214_etc_333;
create table dm_finrisk.AA_0214_etc_333 as
select row_number() over(partition by sq_time order by sq_time desc) as id, —不要排序 以原顺序为主
a.*
from dm_finrisk.AA_0214_etc_222 a;

—4、加上类型名称得到最终结果
drop table dm_finrisk.AA_0214_etc_atratio_ok;
create table dm_finrisk.AA_0214_etc_atratio_ok as
select sq_time
,case when id=1 then ‘OPEN_RATIO_AT’
when id=2 then ‘CONSUME_RATIO_AT’
when id=3 then ‘DEBET_RATIO_AT’
when id=4 then ‘HIS_OVERDUE_RATIO_AT’
when id=5 then ‘NOW_OVERDUE_RATIO_AT’
end as var_name
,name as var_value
from dm_finrisk.AA_0214_etc_333;