ODS 层的设计要点如下:

  • ODS 层的表结构设计受业务系统同步过来的数据结构
  • ODS 层保存全部历史数据,所以压缩格式选择压缩高 ( gzip )
  • ODS 层表名的命名规范:ods表名单分区增量全量标识( inc / full )

日志表

日志数据格式

  1. hadoop fs -cat /origin_data/gmall/log/topic_log/2020-06-14/* | zcat

ADS 层 - 图1

  1. {
  2. "common": { -- 环境信息
  3. "ar": "440000", -- 地区编码
  4. "ba": "Xiaomi", -- 手机品牌
  5. "ch": "vivo", -- 渠道
  6. "is_new": "1", -- 是否首日使用,首次使用的当日,该字段值为1,过了24:00,该字段置为0
  7. "md": "Xiaomi 9", -- 手机型号
  8. "mid": "mid_625416", -- 设备id
  9. "os": "Android 11.0", -- 操作系统
  10. "uid": "209", -- 会员id
  11. "vc": "v2.1.111" -- app版本号
  12. },
  13. "page": { -- 页面信息
  14. "during_time": 2815, -- 持续时间毫秒
  15. "item": "28", -- 目标id
  16. "item_type": "sku_id", -- 目标类型
  17. "last_page_id": "register", -- 上页类型
  18. "page_id": "good_detail", -- 页面ID
  19. "source_type": "promotion" -- 来源类型
  20. },
  21. "actions": [ -- 动作(事件)
  22. {
  23. "action_id": "get_coupon", -- 动作id
  24. "item": "2", -- 目标id
  25. "item_type": "coupon_id", -- 目标类型
  26. "ts": 1592144742407 -- 动作时间戳
  27. }
  28. ],
  29. "displays": [ -- 曝光信息
  30. {
  31. "display_type": "promotion", -- 曝光类型
  32. "item": "32", -- 曝光对象id
  33. "item_type": "sku_id", -- 曝光对象类型
  34. "order": 1, -- 出现顺序
  35. "pos_id": 1 -- 曝光位置
  36. }
  37. ],
  38. "start": { -- 启动日志
  39. "entry": "icon", -- icon 手机图标 notice 通知 install 安装后启动
  40. "loading_time": 18803, -- 启动加载时间
  41. "open_ad_id": 7, -- 广告页ID
  42. "open_ad_ms": 3449, -- 广告总共播放时间
  43. "open_ad_skip_ms": 1989 -- 用户跳过广告时点
  44. },
  45. "err": { -- 错误
  46. "error_code": "1234", -- 错误码
  47. "msg": "***********" -- 错误信息
  48. },
  49. "ts": 1592144741000 -- 跳入时间戳
  50. }

建表数据

  1. -- 删除表
  2. DROP TABLE IF EXISTS ods_log_inc;
  3. -- 创建外部表
  4. CREATE EXTERNAL TABLE ods_log_inc
  5. (
  6. `common` STRUCT<ar :STRING, ba :STRING, ch :STRING, is_new :STRING, md :STRING, mid :STRING, os :STRING, uid
  7. :STRING, vc :STRING> COMMENT '公共信息',
  8. `page` STRUCT<during_time :BIGINT, item :STRING, item_type :STRING, last_page_id :STRING, page_id :STRING,
  9. source_type :STRING> COMMENT '页面信息',
  10. `actions` ARRAY<STRUCT<action_id:STRING, item:STRING, item_type:STRING, ts:BIGINT>> COMMENT '动作信息',
  11. `displays` ARRAY<STRUCT<display_type :STRING, item :STRING, item_type :STRING, `order` :STRING, pos_id
  12. :STRING>> COMMENT '曝光信息',
  13. `start` STRUCT<entry :STRING, loading_time :BIGINT, open_ad_id :BIGINT, open_ad_ms :BIGINT, open_ad_skip_ms
  14. :BIGINT> COMMENT '启动信息',
  15. `err` STRUCT<error_code:BIGINT, msg:STRING> COMMENT '错误信息',
  16. `ts` BIGINT COMMENT '时间戳'
  17. ) COMMENT '活动信息表'
  18. PARTITIONED BY (`dt` STRING)
  19. ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
  20. LOCATION '/warehouse/gmall/ods/ods_log_inc/';

数据装载

load data inpath '/origin_data/gmall/log/topic_log/2020-06-14'
    into table ods_log_inc
    partition (dt = '2020-06-14');

查询数据

-- 查询数据

select common,
       page,
       actions,
       displays,
       `start`,
       err,
       ts,
       dt
from ods_log_inc
where dt = '2020-06-14';

ADS 层 - 图2

每日数据装载脚本

在 cpu101 的 /home/cpu/bin 目录下创建 hdfs_to_ods_log.sh

vim hdfs_to_ods_log.sh

内容 :

#!/bin/bash

# 定义变量方便修改
APP=gmall

# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ]; then
    do_date=$1
else
    do_date=`date -d "-l day" +%F`
fi

echo ================== 日志日期为 $do_date ==================
sql="load data inpath '/origin_data/$APP/log/topic_log/$do_date' 
into table ${APP}.ods_log_inc partition(dt='$do_date');"

hive -e "$sql"

ADS 层 - 图3

增加脚本执行权限

chmod 777 hdfs_to_ods_log.sh

ADS 层 - 图4

脚本用法 :

hdfs_to_ods_log.sh 2020-06-14

业务表

后台管理系统 :

ADS 层 - 图5

全量表

活动信息表

ADS 层 - 图6

-- 删除 活动信息表
DROP TABLE IF EXISTS ods_activity_info_full;

-- 创建外部 活动信息表
CREATE EXTERNAL TABLE ods_activity_info_full(
    `id`            STRING COMMENT '活动id',
    `activity_name` STRING COMMENT '活动名称',
    `activity_type` STRING COMMENT '活动类型',
    `activity_desc` STRING COMMENT '活动描述',
    `start_time`    STRING COMMENT '开始时间',
    `end_time`      STRING COMMENT '结束时间',
    `create_time`   STRING COMMENT '创建时间'
) COMMENT '活动信息表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
    NULL DEFINED AS ''
    LOCATION '/warehouse/gmall/ods/ods_activity_info_full/';

装载该分区数据

-- 装载数据

load data inpath '/origin_data/gmall/db/activity_info_full/2020-06-14'
    into table ods_activity_info_full
    partition (dt = '2020-06-14');

查询活动信息表

-- 查询 活动信息表

select id,
       activity_name,
       activity_type,
       activity_desc,
       start_time,
       end_time,
       create_time,
       dt
from ods_activity_info_full
where dt = '2020-06-14';

ADS 层 - 图7

活动规则表

ADS 层 - 图8

-- 删除 活动规则表
DROP TABLE IF EXISTS ods_activity_rule_full;

-- 创建外部 活动规则表
CREATE EXTERNAL TABLE ods_activity_rule_full(
    `id`               STRING COMMENT '编号',
    `activity_id`      STRING COMMENT '类型',
    `activity_type`    STRING COMMENT '活动类型',
    `condition_amount` DECIMAL(16, 2) COMMENT '满减金额',
    `condition_num`    BIGINT COMMENT '满减件数',
    `benefit_amount`   DECIMAL(16, 2) COMMENT '优惠金额',
    `benefit_discount` DECIMAL(16, 2) COMMENT '优惠折扣',
    `benefit_level`    STRING COMMENT '优惠级别'
) COMMENT '活动规则表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
    NULL DEFINED AS ''
    LOCATION '/warehouse/gmall/ods/ods_activity_rule_full/';

装载该分区数据

-- 装载数据

load data inpath '/origin_data/gmall/db/activity_rule_full/2020-06-14'
    into table ods_activity_rule_full
    partition (dt = '2020-06-14');

查询数据

-- 查询数据

select id,
       activity_id,
       activity_type,
       condition_amount,
       condition_num,
       benefit_amount,
       benefit_discount,
       benefit_level,
       dt
from ods_activity_rule_full
where dt = '2020-06-14';

ADS 层 - 图9

一级品类表

ADS 层 - 图10

-- 删除 一级品类表

drop table if exists ods_base_category1_full;

-- 创建 一级品类表

create external table ods_base_category1_full (
    `id` string comment '编号',
    `name` string comment '分类名称'
) comment '一级品类表'
    partitioned by (`dt` string)
    row format delimited fields terminated by '\t'
    null defined as ''
    location '/warehouse/gmall/ods/ods_base_category1_full/';

装载数据

-- 装载数据
load data inpath '/origin_data/gmall/db/base_category1_full/2020-06-14'
    into table ods_base_category1_full
    partition (dt = '2020-06-14');

查询数据

-- 查询数据

select id,
       name,
       dt
from ods_base_category1_full
where dt = '2020-06-14';

ADS 层 - 图11

二级品类表

ADS 层 - 图12

-- 删除 二级品类表
drop table if exists ods_base_category2_full;

-- 创建 二级品类表
create external table ods_base_category2_full
(
    `id`           string comment '编号',
    `name`         string comment '二级分类名称',
    `category1_id` string comment '一级分类编号'
) comment '二级品类表'
    partitioned by (`dt` STRING)
    row format delimited fields terminated by '\t'
        null defined as ''
    location '/warehouse/gmall/ods/ods_base_category2_full/';

装载数据

-- 装载数据
load data inpath '/origin_data/gmall/db/base_category2_full/2020-06-14'
    into table ods_base_category2_full
    partition (dt = '2020-06-14');

查询数据

-- 查询数据
select id,
       name,
       category1_id,
       dt
from ods_base_category2_full
where dt = '2020-06-14';

ADS 层 - 图13

三级品类表

ADS 层 - 图14

-- 删除 三级品类表

drop table if exists ods_base_category3_full;

-- 创建外部 三级品类表

create external table ods_base_category3_full
(
    `id`           string comment '编号',
    `name`         string comment '三级分类名称',
    `category2_id` string comment '二级分类编号'
) comment '三级品类表'
    partitioned by (`dt` string)
    row format delimited fields terminated by '\t'
        null defined as ''
    location '/warehouse/gmall/ods/ods_base_category3_full/';

装载数据

-- 装载数据

load data inpath '/origin_data/gmall/db/base_category3_full/2020-06-14'
    into table ods_base_category3_full
    partition (dt = '2020-06-14');

查询数据

-- 查询数据
select id,
       name,
       category2_id,
       dt
from ods_base_category3_full
where dt = '2020-06-14';

ADS 层 - 图15

编码字典表

-- 删除 编码字典表
drop table if exists ods_base_dic_full;

-- 创建 编码字典表
create external table ods_base_dic_full
(
    `dic_code`     string comment '编号',
    `dic_name`     string comment '编码名称',
    `parent_code`  string comment '父编号',
    `create_time`  string comment '创建日期',
    `operate_time` string comment '修改日期'
) comment '编码字典表'
    partitioned by (`dt` string)
    row format delimited fields terminated by '\t'
        null defined as ''
    location '/warehouse/gmall/ods/ods_base_dic_full/';

装载数据

-- 装载数据
load data inpath '/origin_data/gmall/db/base_dic_full/2020-06-14'
    into table ods_base_dic_full
    partition (dt = '2020-06-14');

查询数据

-- 查询数据
select dic_code,
       dic_name,
       parent_code,
       create_time,
       operate_time,
       dt
from ods_base_dic_full
where dt = '2020-06-14';

ADS 层 - 图16

省份表

ADS 层 - 图17

-- 删除 省份表
drop table if exists ods_base_province_full;

-- 创建 省份表
create external table ods_base_province_full
(
    `id`         string comment '编号',
    `name`       string comment '省份名称',
    `region_id`  string comment '地区ID',
    `area_code`  string comment '地区编码',
    `iso_code`   string comment '旧版ISO-3166-2编码,供可视化使用',
    `iso_3166_2` string comment '新版IOS-3166-2编码,供可视化使用'
) comment '省份表'
    partitioned by (`dt` string)
    row format delimited fields terminated by '\t'
        null defined as ''
    location '/warehouse/gmall/ods/ods_base_province_full/';

装载数据

-- 装载数据
load data inpath '/origin_data/gmall/db/base_province_full/2020-06-14'
    into table ods_base_province_full
    partition (dt = '2020-06-14');

查询数据

-- 查询数据
select id,
       name,
       region_id,
       area_code,
       iso_code,
       iso_3166_2,
       dt
from ods_base_province_full
where dt = '2020-06-14';

ADS 层 - 图18

地区表

ADS 层 - 图19

-- 删除 地区表
drop table if exists ods_base_region_full;

-- 创建 地区表
create external table ods_base_region_full
(
    `id`          string comment '编号',
    `region_name` string comment '地区名称'
) comment ''
    partitioned by (`dt` string)
    row format delimited fields terminated by '\t'
        null defined as ''
    location '/warehouse/gmall/ods/ods_base_region_full/';

装载数据

-- 装载数据
load data inpath '/origin_data/gmall/db/base_region_full/2020-06-14'
    into table ods_base_region_full
    partition (dt = '2020-06-14');

查询数据

-- 查询数据
select id,
       region_name,
       dt
from ods_base_region_full
where dt = '2020-06-14';

ADS 层 - 图20

品牌表

ADS 层 - 图21

DROP TABLE IF EXISTS ods_base_trademark_full;

CREATE EXTERNAL TABLE ods_base_trademark_full(
    `id`       STRING COMMENT '编号',
    `tm_name`  STRING COMMENT '品牌名称',
    `logo_url` STRING COMMENT '品牌logo的图片路径'
) COMMENT '品牌表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
    NULL DEFINED AS ''
    LOCATION '/warehouse/gmall/ods/ods_base_trademark_full/';

装载数据

load data inpath '/origin_data/gmall/db/base_trademark_full/2020-06-14'
    into table ods_base_trademark_full
    partition (dt = '2020-06-14');

查询数据

select id,
       tm_name,
       logo_url,
       dt
from ods_base_trademark_full
where dt = '2020-06-14';

ADS 层 - 图22

购物车表

ADS 层 - 图23

-- 删除 购物车表
drop table if exists ods_cart_info_full;

-- 创建 购物车表
create external table ods_cart_info_full
(
    `id`           string comment '编号',
    `user_id`      string comment '用户id',
    `sku_id`       string comment 'sku_id',
    `cart_price`   decimal(16, 2) comment '放入购物车时价格',
    `sku_num`      bigint comment '数量',
    `img_url`      string comment '商品图片地址',
    `sku_name`     string comment 'sku名称 (冗余)',
    `is_checked`   string comment '是否被选中',
    `create_time`  string comment '创建时间',
    `operate_time` string comment '修改时间',
    `is_ordered`   string comment '是否已经下单',
    `order_time`   string comment '下单时间',
    `source_type`  string comment '来源类型',
    `source_id`    string comment '来源编号'
) comment '购物车全量表'
    partitioned by (`dt` string)
    row format delimited fields terminated by '\t'
        null defined as ''
    location '/warehouse/gmall/ods/ods_cart_info_full/';

装载数据

-- 装载数据
load data inpath '/origin_data/gmall/db/cart_info_full/2020-06-14'
    into table ods_cart_info_full
    partition (dt = '2020-06-14');

查询数据

select id,
       user_id,
       sku_id,
       cart_price,
       sku_num,
       img_url,
       sku_name,
       is_checked,
       create_time,
       operate_time,
       is_ordered,
       order_time,
       source_type,
       source_id,
       dt
from ods_cart_info_full
where dt = '2020-06-14';

ADS 层 - 图24

优惠券信息表

ADS 层 - 图25

-- 删除 优惠券信息表
drop table if exists ods_coupon_info_full;

-- 创建 优惠券信息表
create external table ods_coupon_info_full
(
    `id`               string comment '购物券编号',
    `coupon_name`      string comment '购物券名称',
    `coupon_type`      string comment '购物券类型 1 现金券 2 折扣券 3 满减券 4 满件打折券',
    `condition_amount` DECIMAL(16, 2) COMMENT '满额数',
    `condition_num`    BIGINT COMMENT '满件数',
    `activity_id`      STRING COMMENT '活动编号',
    `benefit_amount`   DECIMAL(16, 2) COMMENT '减金额',
    `benefit_discount` DECIMAL(16, 2) COMMENT '折扣',
    `create_time`      STRING COMMENT '创建时间',
    `range_type`       STRING COMMENT '范围类型 1、商品 2、品类 3、品牌',
    `limit_num`        BIGINT COMMENT '最多领用次数',
    `taken_count`      BIGINT COMMENT '已领用次数',
    `start_time`       STRING COMMENT '开始领取时间',
    `end_time`         STRING COMMENT '结束领取时间',
    `operate_time`     STRING COMMENT '修改时间',
    `expire_time`      STRING COMMENT '过期时间'
) comment '优惠券信息表'
    partitioned by (`dt` string)
    row format delimited fields terminated by '\t'
        null defined as ''
    location '/warehouse/gmall/ods/ods_coupon_info_full/';
-- 装载数据
load data inpath '/origin_data/gmall/db/coupon_info_full/2020-06-14'
    into table ods_coupon_info_full
    partition (dt = '2020-06-14');
-- 查询数据
select id,
       coupon_name,
       coupon_type,
       condition_amount,
       condition_num,
       activity_id,
       benefit_amount,
       benefit_discount,
       create_time,
       range_type,
       limit_num,
       taken_count,
       start_time,
       end_time,
       operate_time,
       expire_time,
       dt
from ods_coupon_info_full
where dt = '2020-06-14';

ADS 层 - 图26

商品平台属性表

ADS 层 - 图27

创建 商品平台属性表

-- 删除 商品平台属性表
drop table if exists ods_sku_attr_value_full;

-- 创建 商品平台属性表
create external table ods_sku_attr_value_full
(
    `id`          string comment '编号',
    `attr_id`     string comment '平台属性ID',
    `value_id`    string comment '平台属性值ID',
    `sku_id`      string comment '商品ID',
    `attr_name`   string comment '平台属性名称',
    `values_name` string comment '平台属性值名称'
) comment 'sku平台属性表'
    partitioned by (`dt` string)
    row format delimited fields terminated by '\t'
        null defined as ''
    location '/warehouse/gmall/ods/ods_sku_attr_value_full/';

装载数据

-- 装载数据
load data inpath '/origin_data/gmall/db/sku_attr_value_full/2020-06-14'
    into table ods_sku_attr_value_full
    partition (dt = '2020-06-14');

查询数据

-- 查询数据
select id,
       attr_id,
       value_id,
       sku_id,
       attr_name,
       values_name,
       dt
from ods_sku_attr_value_full
where dt = '2020-06-14';

ADS 层 - 图28

商品表

ADS 层 - 图29

-- 删除 商品表
drop table if exists ods_sku_info_full;

-- 创建 商品表
create external table ods_sku_info_full
(
    `id`              string comment 'skuId',
    `spu_id`          string comment 'spuid',
    `price`           decimal(16, 2) comment '价格',
    `sku_name`        string comment '商品名称',
    `sku_desc`        string comment '商品描述',
    `weight`          decimal(16, 2) comment '重量',
    `tm_id`           string comment '品牌id',
    `category3_id`    string comment '品类id',
    `sku_default_igm` string comment '商品图片地址',
    `is_sale`         string comment '是否在售',
    `create_time`     string comment '创建时间'
) comment 'SKU商品表'
    partitioned by (`dt` string)
    row format delimited fields terminated by '\t'
        null defined as ''
    location '/warehouse/gmall/ods/ods_sku_info_full/';
-- 装载数据
load data inpath '/origin_data/gmall/db/sku_info_full/2020-06-14'
    into table ods_sku_info_full
    partition (dt = '2020-06-14');
-- 查询数据
select id,
       spu_id,
       price,
       sku_name,
       sku_desc,
       weight,
       tm_id,
       category3_id,
       sku_default_igm,
       is_sale,
       create_time,
       dt
from ods_sku_info_full
where dt = '2020-06-14';

ADS 层 - 图30

商品销售属性值表

ADS 层 - 图31

-- 删除 商品销售属性值表
DROP TABLE IF EXISTS ods_sku_sale_attr_value_full;

-- 创建 商品销售属性值表
CREATE EXTERNAL TABLE ods_sku_sale_attr_value_full(
    `id`                   STRING COMMENT '编号',
    `sku_id`               STRING COMMENT 'sku_id',
    `spu_id`               STRING COMMENT 'spu_id',
    `sale_attr_value_id`   STRING COMMENT '销售属性值id',
    `sale_attr_id`         STRING COMMENT '销售属性id',
    `sale_attr_name`       STRING COMMENT '销售属性名称',
    `sale_attr_value_name` STRING COMMENT '销售属性值名称'
) COMMENT 'sku销售属性名称'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
    NULL DEFINED AS ''
    LOCATION '/warehouse/gmall/ods/ods_sku_sale_attr_value_full/';
-- 装载数据
load data inpath '/origin_data/gmall/db/sku_sale_attr_value_full/2020-06-14'
    into table ods_sku_sale_attr_value_full
    partition (dt = '2020-06-14');
-- 查询数据
select id,
       sku_id,
       spu_id,
       sale_attr_value_id,
       sale_attr_id,
       sale_attr_name,
       sale_attr_value_name,
       dt
from ods_sku_sale_attr_value_full
where dt = '2020-06-14';

ADS 层 - 图32

SPU表

ADS 层 - 图33

-- 删除 SPU表
drop table if exists ods_spu_info_full;

-- 创建 SPU表
create external table ods_spu_info_full
(
    `id`           STRING COMMENT 'spu_id',
    `spu_name`     STRING COMMENT 'spu名称',
    `description`  STRING COMMENT '描述信息',
    `category3_id` STRING COMMENT '品类id',
    `tm_id`        STRING COMMENT '品牌id'
) comment 'SPU商品表'
    partitioned by (`dt` string)
    row format delimited fields terminated by '\t'
        null defined as ''
    location '/warehouse/gmall/ods/ods_spu_info_full/';
-- 加载数据
load data inpath '/origin_data/gmall/db/spu_info_full/2020-06-14'
    into table ods_spu_info_full
    partition (dt = '2020-06-14');
-- 查询
select id,
       spu_name,
       description,
       category3_id,
       tm_id,
       dt
from ods_spu_info_full
where dt = '2020-06-14';

ADS 层 - 图34

增量表

电商业务表 :

ADS 层 - 图35

购物车表

ADS 层 - 图36

hadoop fs -cat /origin_data/gmall/db/cart_info_inc/2020-06-14 | zcat

ADS 层 - 图37

sql :

-- 删除购物车表
drop table if exists ods_cart_info_inc;

-- 创建 购物车表
create external table ods_cart_info_inc
(
    `type` string comment '变动类型',
    `ts`   bigint comment '变动时间',
    `data` struct<id :string, user_id :string,sku_id :string, cart_price :decimal(16, 2), sku_num :bigint,img_url
                  :string,sku_name :string,is_checked :string,create_time :string, operate_time :string,is_ordered
                  :string,order_time :string,source_type :string,source_id :string> comment '数据',
    `old`  map<string,string> comment '旧值'
) comment '购物车增量表'
    partitioned by (`dt` string)
    row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
    location '/warehouse/gmall/ods/ods_cart_info_inc/';
-- 转载数据
load data inpath '/origin_data/gmall/db/cart_info_inc/2020-06-14'
    overwrite into table ods_cart_info_inc
    partition (dt = '2020-06-14');
-- 查询数据
set hive.execution.engine = spark;
select type,
       ts,
       data.id,
       data.user_id,
       data.sku_id,
       data.create_time,
       data.source_id,
       data.source_type,
       data.sku_num,
       old['id'] as old_id,
       old['user_id'] as old_user_id,
       old['sku_id'] as old_sku_id,
       old['create_time'] as old_sku_id,
       old['source_id'] as old_source_id,
       old['source_type'] as old_source_type,
       old['sku_num'] as old_sku_num,
       dt
from ods_cart_info_inc
where dt = '2020-06-14'
limit 100;

ADS 层 - 图38

评论表

ADS 层 - 图39

-- 删除评论表
drop table if exists ods_comment_info_inc;

-- 创建 评论表
create external table ods_comment_info_inc
(
    `type` string comment '变动类型',
    `ts`   bigint comment '变动时间',
    `data` struct<id :string, user_id :string, nick_name :string, head_img :string, sku_id :string,spu_id :string,
                  order_id :string, appraise :string, comment_txt :string, create_time :string, operate_time
                  :string> comment '数据',
    `old`  map<string,string> comment '旧值'
) comment '评论表'
    partitioned by (`dt` string)
    row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
    location '/warehouse/gmall/ods/ods_comment_info_inc/';
-- 装载数据
load data inpath '/origin_data/gmall/db/comment_info_inc/2020-06-14'
    into table ods_comment_info_inc
    partition (dt = '2020-06-14');
-- 查询数据
select type,
       ts,
       data.id,
       data.user_id,
       data.nick_name,
       data.head_img,
       data.sku_id,
       data.spu_id,
       data.order_id,
       data.appraise,
       data.comment_txt,
       data.create_time,
       data.operate_time
       old,
       dt
from ods_comment_info_inc
where dt = '2020-06-14';

优惠券领用表

ADS 层 - 图40

-- 删除 优惠券领用表
drop table if exists ods_coupon_use_inc;

-- 创建 优惠券领用表
create external table ods_coupon_use_inc
(
    `type` string comment '变动类型',
    `ts`   bigint comment '变动时间',
    `data` struct<id :string, coupon_id :string, user_id :string, order_id :string, coupon_status :string, get_time
                  :string, using_time :string, used_time :string, expire_time :string> comment '数据',
    `old`  map<string,string> comment '旧值'
) comment '优惠券领用表'
    partitioned by (`dt` string)
    row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
    location '/warehouse/gmall/ods/ods_coupon_use_inc/';
-- 转载数据
load data inpath '/origin_data/gmall/db/coupon_use_inc/2020-06-14'
    into table ods_coupon_use_inc
    partition (dt = '2020-06-14');
-- 查询数据
select type,
       ts,
       data.id,
       data.coupon_id,
       data.user_id,
       data.order_id,
       data.coupon_status,
       data.get_time,
       data.using_time,
       data.used_time,
       data.expire_time,
       old,
       dt
from ods_coupon_use_inc
where dt = '2020-06-14';

收藏表

ADS 层 - 图41

-- 删除 收藏表
drop table if exists ods_favor_info_inc;

-- 创建 收藏表
create external table ods_favor_info_inc
(
    `type` string comment '变动类型',
    `ts`   bigint comment '变动时间',
    `data` struct<id :string, user_id :string, sku_id :string, spu_id :string, is_cancel :string, create_time :string,
                  cancel_time :string> comment '数据',
    `old`  map<string,string> comment '旧值'
) comment '收藏表'
    partitioned by (`dt` string)
    row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
    location '/warehouse/gmall/ods/ods_favor_info_inc/';
-- 装载数据
load data inpath '/origin_data/gmall/db/favor_info_inc/2020-06-14'
    into table ods_favor_info_inc
    partition (dt = '2020-06-14');
-- 查询数据
select type,
       ts,
       data,
       old,
       dt
from ods_favor_info_inc
where dt = '2020-06-14';

订单明细表

ADS 层 - 图42

DROP TABLE IF EXISTS ods_order_detail_inc;

CREATE EXTERNAL TABLE ods_order_detail_inc(
    `type` STRING COMMENT '变动类型',
    `ts`   BIGINT COMMENT '变动时间',
    `data` STRUCT<id:STRING, order_id:STRING, sku_id:STRING, sku_name:STRING, img_url:STRING, order_price:DECIMAL(16, 2), sku_num :BIGINT, create_time:STRING, source_type:STRING, source_id:STRING, split_total_amount:DECIMAL(16, 2), split_activity_amount:DECIMAL(16, 2), split_coupon_amount:DECIMAL(16, 2)> COMMENT '数据',
    `old`  MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单明细表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
    LOCATION '/warehouse/gmall/ods/ods_order_detail_inc/';

订单明细活动关联表

ADS 层 - 图43

DROP TABLE IF EXISTS ods_order_detail_activity_inc;

CREATE EXTERNAL TABLE ods_order_detail_activity_inc(
    `type` STRING COMMENT '变动类型',
    `ts`   BIGINT COMMENT '变动时间',
    `data` STRUCT<id:STRING, order_id:STRING, order_detail_id:STRING, activity_id:STRING, activity_rule_id:STRING, sku_id:STRING, create_time:STRING> COMMENT '数据',
    `old`  MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单明细活动关联表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
    LOCATION '/warehouse/gmall/ods/ods_order_detail_activity_inc/';

订单明细优惠券关联表

ADS 层 - 图44

DROP TABLE IF EXISTS ods_order_detail_coupon_inc;

CREATE EXTERNAL TABLE ods_order_detail_coupon_inc(
    `type` STRING COMMENT '变动类型',
    `ts`   BIGINT COMMENT '变动时间',
    `data` STRUCT<id:STRING, order_id:STRING, order_detail_id:STRING, coupon_id:STRING, coupon_use_id:STRING, sku_id:STRING, create_time:STRING> COMMENT '数据',
    `old`  MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单明细优惠券关联表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
    LOCATION '/warehouse/gmall/ods/ods_order_detail_coupon_inc/';

订单表

ADS 层 - 图45

DROP TABLE IF EXISTS ods_order_info_inc;

CREATE EXTERNAL TABLE ods_order_info_inc(
    `type` STRING COMMENT '变动类型',
    `ts`   BIGINT COMMENT '变动时间',
    `data` STRUCT<id:STRING, consignee:STRING, consignee_tel :STRING, total_amount:DECIMAL(16, 2), order_status:STRING, user_id:STRING, payment_way :STRING,delivery_address :STRING, order_comment :STRING, out_trade_no :STRING, trade_body:STRING, create_time :STRING,operate_time :STRING, expire_time :STRING, process_status :STRING, tracking_no:STRING, parent_order_id :STRING, img_url :STRING, province_id :STRING, activity_reduce_amount:DECIMAL(16, 2), coupon_reduce_amount :DECIMAL(16, 2), original_total_amount :DECIMAL(16, 2), freight_fee:DECIMAL(16, 2), freight_fee_reduce :DECIMAL(16, 2), refundable_time :DECIMAL(16, 2)> COMMENT '数据',
    `old`  MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
    LOCATION '/warehouse/gmall/ods/ods_order_info_inc/';

退单表

ADS 层 - 图46

DROP TABLE IF EXISTS ods_order_refund_info_inc;

CREATE EXTERNAL TABLE ods_order_refund_info_inc(
    `type` STRING COMMENT '变动类型',
    `ts`   BIGINT COMMENT '变动时间',
    `data` STRUCT<id :STRING,user_id :STRING,order_id :STRING,sku_id :STRING,refund_type :STRING,refund_num :BIGINT,refund_amount:DECIMAL(16, 2),refund_reason_type :STRING,refund_reason_txt :STRING,refund_status :STRING,create_time:STRING> COMMENT '数据',
    `old`  MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '退单表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
    LOCATION '/warehouse/gmall/ods/ods_order_refund_info_inc/';

订单状态流水表

ADS 层 - 图47

DROP TABLE IF EXISTS ods_order_status_log_inc;

CREATE EXTERNAL TABLE ods_order_status_log_inc(
    `type` STRING COMMENT '变动类型',
    `ts`   BIGINT COMMENT '变动时间',
    `data` STRUCT<id :STRING,order_id :STRING,order_status :STRING,operate_time :STRING> COMMENT '数据',
    `old`  MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '退单表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
    LOCATION '/warehouse/gmall/ods/ods_order_status_log_inc/';

支付表

ADS 层 - 图48

DROP TABLE IF EXISTS ods_payment_info_inc;

CREATE EXTERNAL TABLE ods_payment_info_inc(
    `type` STRING COMMENT '变动类型',
    `ts`   BIGINT COMMENT '变动时间',
    `data` STRUCT<id :STRING,out_trade_no :STRING,order_id :STRING,user_id :STRING,payment_type :STRING,trade_no:STRING,total_amount :DECIMAL(16, 2),subject :STRING,payment_status :STRING,create_time :STRING,callback_time:STRING,callback_content :STRING> COMMENT '数据',
    `old`  MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '支付表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
    LOCATION '/warehouse/gmall/ods/ods_payment_info_inc/';

退款表

ADS 层 - 图49

DROP TABLE IF EXISTS ods_refund_payment_inc;

CREATE EXTERNAL TABLE ods_refund_payment_inc(
    `type` STRING COMMENT '变动类型',
    `ts`   BIGINT COMMENT '变动时间',
    `data` STRUCT<id :STRING,out_trade_no :STRING,order_id :STRING,sku_id :STRING,payment_type :STRING,trade_no :STRING,total_amount:DECIMAL(16, 2),subject :STRING,refund_status :STRING,create_time :STRING,callback_time :STRING,callback_content:STRING> COMMENT '数据',
    `old`  MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '退款表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
    LOCATION '/warehouse/gmall/ods/ods_refund_payment_inc/';

用户表

ADS 层 - 图50

DROP TABLE IF EXISTS ods_user_info_inc;

CREATE EXTERNAL TABLE ods_user_info_inc(
    `type` STRING COMMENT '变动类型',
    `ts`   BIGINT COMMENT '变动时间',
    `data` STRUCT<id :STRING,login_name :STRING,nick_name :STRING,passwd :STRING,name :STRING,phone_num :STRING,email:STRING,head_img :STRING,user_level :STRING,birthday :STRING,gender :STRING,create_time :STRING,operate_time:STRING,status :STRING> COMMENT '数据',
    `old`  MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '用户表'
    PARTITIONED BY (`dt` STRING)
    ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
    LOCATION '/warehouse/gmall/ods/ods_user_info_inc/';

数据装载脚本

在 cpu101 的 /home/cpu/bin 目录下创建 hdfs_to_ods_db.sh

vim hdfs_to_ods_db.sh
#!/bin/bash

APP=gmall

if [ -n "$2" ] ;then
   do_date=$2
else 
   do_date=`date -d '-1 day' +%F`
fi

load_data(){
    sql=""
    for i in $*; do
        #判断路径是否存在
        hadoop fs -test -e /origin_data/$APP/db/${i:4}/$do_date
        #路径存在方可装载数据
        if [[ $? = 0 ]]; then
            sql=$sql"load data inpath '/origin_data/$APP/db/${i:4}/$do_date' OVERWRITE into table ${APP}.$i partition(dt='$do_date');"
        fi
    done
    hive -e "$sql"
}

case $1 in
    "ods_activity_info_full")
        load_data "ods_activity_info_full"
    ;;
    "ods_activity_rule_full")
        load_data "ods_activity_rule_full"
    ;;
    "ods_base_category1_full")
        load_data "ods_base_category1_full"
    ;;
    "ods_base_category2_full")
        load_data "ods_base_category2_full"
    ;;
    "ods_base_category3_full")
        load_data "ods_base_category3_full"
    ;;
    "ods_base_dic_full")
        load_data "ods_base_dic_full"
    ;;
    "ods_base_province_full")
        load_data "ods_base_province_full"
    ;;
    "ods_base_region_full")
        load_data "ods_base_region_full"
    ;;
    "ods_base_trademark_full")
        load_data "ods_base_trademark_full"
    ;;
    "ods_cart_info_full")
        load_data "ods_cart_info_full"
    ;;
    "ods_coupon_info_full")
        load_data "ods_coupon_info_full"
    ;;
    "ods_sku_attr_value_full")
        load_data "ods_sku_attr_value_full"
    ;;
    "ods_sku_info_full")
        load_data "ods_sku_info_full"
    ;;
    "ods_sku_sale_attr_value_full")
        load_data "ods_sku_sale_attr_value_full"
    ;;
    "ods_spu_info_full")
        load_data "ods_spu_info_full"
    ;;

    "ods_cart_info_inc")
        load_data "ods_cart_info_inc"
    ;;
    "ods_comment_info_inc")
        load_data "ods_comment_info_inc"
    ;;
    "ods_coupon_use_inc")
        load_data "ods_coupon_use_inc"
    ;;
    "ods_favor_info_inc")
        load_data "ods_favor_info_inc"
    ;;
    "ods_order_detail_inc")
        load_data "ods_order_detail_inc"
    ;;
    "ods_order_detail_activity_inc")
        load_data "ods_order_detail_activity_inc"
    ;;
    "ods_order_detail_coupon_inc")
        load_data "ods_order_detail_coupon_inc"
    ;;
    "ods_order_info_inc")
        load_data "ods_order_info_inc"
    ;;
    "ods_order_refund_info_inc")
        load_data "ods_order_refund_info_inc"
    ;;
    "ods_order_status_log_inc")
        load_data "ods_order_status_log_inc"
    ;;
    "ods_payment_info_inc")
        load_data "ods_payment_info_inc"
    ;;
    "ods_refund_payment_inc")
        load_data "ods_refund_payment_inc"
    ;;
    "ods_user_info_inc")
        load_data "ods_user_info_inc"
    ;;

    "all")
        load_data "ods_activity_info_full" "ods_activity_rule_full" "ods_base_category1_full" "ods_base_category2_full" "ods_base_category3_full" "ods_base_dic_full" "ods_base_province_full" "ods_base_region_full" "ods_base_trademark_full" "ods_cart_info_full" "ods_coupon_info_full" "ods_sku_attr_value_full" "ods_sku_info_full" "ods_sku_sale_attr_value_full" "ods_spu_info_full" "ods_cart_info_inc" "ods_comment_info_inc" "ods_coupon_use_inc" "ods_favor_info_inc" "ods_order_detail_inc" "ods_order_detail_activity_inc" "ods_order_detail_coupon_inc" "ods_order_info_inc" "ods_order_refund_info_inc" "ods_order_status_log_inc" "ods_payment_info_inc" "ods_refund_payment_inc" "ods_user_info_inc"
    ;;
esac

ADS 层 - 图51

增加脚本执行权限

chmod 777 hdfs_to_ods_db.sh

ADS 层 - 图52

脚本用法 :

hdfs_to_ods_db.sh all 2020-06-14

ADS 层 - 图53