--1.创建表
create table ods_order_info(
id string COMMENT '订单编号',
order_status string COMMENT '订单状态',
create_time string COMMENT '创建时间'
)partitioned by (dt string);
--模拟数据
insert into table ods_order_info partition(dt="2021-06-03") values("1","待支付","2021-06-03");
insert into table ods_order_info partition(dt="2021-06-03") values("2","待支付","2021-06-03");
insert into table ods_order_info partition(dt="2021-06-03") values("3","已支付","2021-06-03");
--分别模拟新增和修改数据
insert into table ods_order_info partition(dt="2021-06-04") values("1","已支付","2021-06-03");
insert into table ods_order_info partition(dt="2021-06-04") values("4","取消订单","2021-06-04");
--创建拉链表
create table dwd_order_info_his(
id string COMMENT '订单编号',
order_status string COMMENT '订单状态',
start_date string COMMENT '有效开始日期',
end_date string COMMENT '有效结束日期'
) COMMENT '订单拉链表'
stored as parquet
tblproperties ("parquet.compression"="snappy");
--创建临时拉链表
create table dwd_order_info_his_tmp like dwd_order_info_his;
--首先向拉链表中导入数据,第一此导入认为所有的数据都是合法的所以start_time是数据的start_time;end_time是9999-99-99
insert overwrite table dwd_order_info_his select id,order_status,dt,"9999-99-99" from ods_order_info where dt<="2021-06-03";
--新的一天导入变化的数据到拉链临时表
insert overwrite table dwd_order_info_his_tmp select id,order_status,dt,"9999-99-99" from ods_order_info where dt="2021-06-04";
--用拉链表和ods最新更新的数据left join,left是重点保证所有的拉链数据都存在,如果右表id是null 说明没有新增
--如果不是null 说明新增数据那么将end_time 改成当前时间-1天 需要注意的是必须加t2.end_date='9999-99-99' 否则可能会更新多条记录
insert overwrite into dwd_order_info_his_tmp
select t1.id,t1.order_status,t1.start_date as create_time ,if(t2.id is null,t1.end_date,"2021-06-04") as end_date from dwd_order_info_his t1 left join
(select * from ods_order_info where dt="2021-06-04") t2 on t1.id=t2.id and t2.end_date='9999-99-99'
union all
SELECT t3.id,t3.order_status,t3.create_time,'9999-99-99' as end_date FROM ods_order_info t3 where dt='2021-06-04'