数仓之全量表、增量表、快照表、切片表、拉链表

drop table if exists ods.shops;
create table ods.shops(
  `shopid` int COMMENT '商铺ID',
  `userid` int COMMENT '商铺负责人', 
  `areaid` int COMMENT '区域ID',
  `shopname` string COMMENT '商铺名称',
  `shoplevel` int COMMENT '商铺等级',
  `status` int COMMENT '商铺状态',
  `createtime` string COMMENT '创建日期',
  `modifytime` string COMMENT  '修改日期'
) COMMENT '商家信息表'
PARTITIONED BY (`ds` string)
row format delimited fields terminated by ',';

../shops/shop-2022-09-19.dat
100050,1,100225,WSxxx营超市,1,1,2020-06-28,2020-07-01 13:22:22
100052,2,100236,新鲜xxx旗舰店,1,1,2020-06-28,2020-07-01 13:22:22
100053,3,100011,华为xxx旗舰店,1,1,2020-06-28,2020-07-01 13:22:22
100054,4,100159,小米xxx旗舰店,1,1,2020-06-28,2020-07-01 13:22:22
100055,5,100211,苹果xxx旗舰店,1,1,2020-06-28,2020-07-01 13:22:22

../shops/shop-2022-09-20.dat
100057,7,100311,三只xxx鼠零食,1,1,2020-06-28,2020-07-02 13:22:22
100058,8,100329,良子xxx铺美食,1,1,2020-06-28,2020-07-02 13:22:22
100054,4,100159,小米xxx旗舰店,2,1,2020-06-28,2020-07-02 13:22:22
100055,5,100211,苹果xxx旗舰店,2,1,2020-06-28,2020-07-02 13:22:22

-- 加载数据
load data local inpath '/2022-09-19.dat' into table ods.shops partition(ds='2022-09-19')

2.创建拉链表

drop table if exists dim.shops_his;
create table dim.shops_his(
  `shopid` int COMMENT '商铺ID',
  `userid` int COMMENT '商铺负责人', 
  `areaid` int COMMENT '区域ID',
  `shopname` string COMMENT '商铺名称',
  `shoplevel` int COMMENT '商铺等级',
  `status` int COMMENT '商铺状态',
  `createtime` string COMMENT '创建日期',
  `modifytime` string COMMENT  '修改日期',
  -- 拉链表新增两列，生效起始时间和失效结束时间
  `start_date` string  COMMENT '生效起始日期',
  `end_date` string  COMMENT '失效结束日期'
) comment '商家信息表';

3.初始化拉链表

-- 初始化拉链表
insert overwrite table dim.shops_his
select 
     shopid
    ,userid
    ,areaid
    ,shopname
    ,shoplevel
    ,status
    ,createtime
    ,modifytime
    ,case when modifytime is not null then substr(modifytime, 0, 10)
          else substr(createtime, 0, 10) 
     end as start_date 
    ,'9999-12-31' as end_date
from ods.shops
where ds = '2022-09-19';

4.新增增量数据

insert into table dim.shops_his
select 
     shopid
    ,userid
    ,areaid
    ,shopname
    ,shoplevel
    ,status
    ,createtime
    ,modifytime
    ,case when modifytime is not null then substr(modifytime, 0, 10)
          else substr(createtime, 0, 10) 
     end as startdate
    ,'9999-12-31' as enddate
from ods.shops
where ds = '2022-09-20';

5.对比上日变化数据关链操作

insert into table dim.shops_his
select 
     b.shopid
    ,b.userid
    ,b.areaid
    ,b.shopname
    ,b.shoplevel
    ,b.status
    ,b.createtime
    ,b.modifytime
    ,CASE WHEN a.shopid is not null and b.enddate ='9999-12-31' THEN date_add('2022-09-20',-1) 
          ELSE b.enddate 
     end as end_date 
     --上日若存在结束日期置为上日，不存在置为最大日期
from (select * from ods.shops where ds='2022-09-20') a
right join dim.shops_his b on a.shopid = b.shopid;