--1.创建表create table ods_order_info( id string COMMENT '订单编号', order_status string COMMENT '订单状态', create_time string COMMENT '创建时间')partitioned by (dt string);--模拟数据insert into table ods_order_info partition(dt="2021-06-03") values("1","待支付","2021-06-03");insert into table ods_order_info partition(dt="2021-06-03") values("2","待支付","2021-06-03");insert into table ods_order_info partition(dt="2021-06-03") values("3","已支付","2021-06-03");--分别模拟新增和修改数据insert into table ods_order_info partition(dt="2021-06-04") values("1","已支付","2021-06-03");insert into table ods_order_info partition(dt="2021-06-04") values("4","取消订单","2021-06-04");--创建拉链表create table dwd_order_info_his( id string COMMENT '订单编号', order_status string COMMENT '订单状态', start_date string COMMENT '有效开始日期', end_date string COMMENT '有效结束日期') COMMENT '订单拉链表'stored as parquettblproperties ("parquet.compression"="snappy");--创建临时拉链表create table dwd_order_info_his_tmp like dwd_order_info_his;--首先向拉链表中导入数据,第一此导入认为所有的数据都是合法的所以start_time是数据的start_time;end_time是9999-99-99insert overwrite table dwd_order_info_his select id,order_status,dt,"9999-99-99" from ods_order_info where dt<="2021-06-03";--新的一天导入变化的数据到拉链临时表insert overwrite table dwd_order_info_his_tmp select id,order_status,dt,"9999-99-99" from ods_order_info where dt="2021-06-04";--用拉链表和ods最新更新的数据left join,left是重点保证所有的拉链数据都存在,如果右表id是null 说明没有新增--如果不是null 说明新增数据那么将end_time 改成当前时间-1天 需要注意的是必须加t2.end_date='9999-99-99' 否则可能会更新多条记录insert overwrite into dwd_order_info_his_tmp select t1.id,t1.order_status,t1.start_date as create_time ,if(t2.id is null,t1.end_date,"2021-06-04") as end_date from dwd_order_info_his t1 left join (select * from ods_order_info where dt="2021-06-04") t2 on t1.id=t2.id and t2.end_date='9999-99-99'union allSELECT t3.id,t3.order_status,t3.create_time,'9999-99-99' as end_date FROM ods_order_info t3 where dt='2021-06-04'