1. --1.创建表
    2. create table ods_order_info(
    3. id string COMMENT '订单编号',
    4. order_status string COMMENT '订单状态',
    5. create_time string COMMENT '创建时间'
    6. )partitioned by (dt string);
    7. --模拟数据
    8. insert into table ods_order_info partition(dt="2021-06-03") values("1","待支付","2021-06-03");
    9. insert into table ods_order_info partition(dt="2021-06-03") values("2","待支付","2021-06-03");
    10. insert into table ods_order_info partition(dt="2021-06-03") values("3","已支付","2021-06-03");
    11. --分别模拟新增和修改数据
    12. insert into table ods_order_info partition(dt="2021-06-04") values("1","已支付","2021-06-03");
    13. insert into table ods_order_info partition(dt="2021-06-04") values("4","取消订单","2021-06-04");
    14. --创建拉链表
    15. create table dwd_order_info_his(
    16. id string COMMENT '订单编号',
    17. order_status string COMMENT '订单状态',
    18. start_date string COMMENT '有效开始日期',
    19. end_date string COMMENT '有效结束日期'
    20. ) COMMENT '订单拉链表'
    21. stored as parquet
    22. tblproperties ("parquet.compression"="snappy");
    23. --创建临时拉链表
    24. create table dwd_order_info_his_tmp like dwd_order_info_his;
    25. --首先向拉链表中导入数据,第一此导入认为所有的数据都是合法的所以start_time是数据的start_time;end_time9999-99-99
    26. insert overwrite table dwd_order_info_his select id,order_status,dt,"9999-99-99" from ods_order_info where dt<="2021-06-03";
    27. --新的一天导入变化的数据到拉链临时表
    28. insert overwrite table dwd_order_info_his_tmp select id,order_status,dt,"9999-99-99" from ods_order_info where dt="2021-06-04";
    29. --用拉链表和ods最新更新的数据left joinleft是重点保证所有的拉链数据都存在,如果右表idnull 说明没有新增
    30. --如果不是null 说明新增数据那么将end_time 改成当前时间-1 需要注意的是必须加t2.end_date='9999-99-99' 否则可能会更新多条记录
    31. insert overwrite into dwd_order_info_his_tmp
    32. select t1.id,t1.order_status,t1.start_date as create_time ,if(t2.id is null,t1.end_date,"2021-06-04") as end_date from dwd_order_info_his t1 left join
    33. (select * from ods_order_info where dt="2021-06-04") t2 on t1.id=t2.id and t2.end_date='9999-99-99'
    34. union all
    35. SELECT t3.id,t3.order_status,t3.create_time,'9999-99-99' as end_date FROM ods_order_info t3 where dt='2021-06-04'