ODS 层的设计要点如下:
- ODS 层的表结构设计受业务系统同步过来的数据结构
- ODS 层保存全部历史数据,所以压缩格式选择压缩高 ( gzip )
- ODS 层表名的命名规范:ods表名单分区增量全量标识( inc / full )
日志表
日志数据格式
hadoop fs -cat /origin_data/gmall/log/topic_log/2020-06-14/* | zcat

{"common": { -- 环境信息"ar": "440000", -- 地区编码"ba": "Xiaomi", -- 手机品牌"ch": "vivo", -- 渠道"is_new": "1", -- 是否首日使用,首次使用的当日,该字段值为1,过了24:00,该字段置为0"md": "Xiaomi 9", -- 手机型号"mid": "mid_625416", -- 设备id"os": "Android 11.0", -- 操作系统"uid": "209", -- 会员id"vc": "v2.1.111" -- app版本号},"page": { -- 页面信息"during_time": 2815, -- 持续时间毫秒"item": "28", -- 目标id"item_type": "sku_id", -- 目标类型"last_page_id": "register", -- 上页类型"page_id": "good_detail", -- 页面ID"source_type": "promotion" -- 来源类型},"actions": [ -- 动作(事件){"action_id": "get_coupon", -- 动作id"item": "2", -- 目标id"item_type": "coupon_id", -- 目标类型"ts": 1592144742407 -- 动作时间戳}],"displays": [ -- 曝光信息{"display_type": "promotion", -- 曝光类型"item": "32", -- 曝光对象id"item_type": "sku_id", -- 曝光对象类型"order": 1, -- 出现顺序"pos_id": 1 -- 曝光位置}],"start": { -- 启动日志"entry": "icon", -- icon 手机图标 notice 通知 install 安装后启动"loading_time": 18803, -- 启动加载时间"open_ad_id": 7, -- 广告页ID"open_ad_ms": 3449, -- 广告总共播放时间"open_ad_skip_ms": 1989 -- 用户跳过广告时点},"err": { -- 错误"error_code": "1234", -- 错误码"msg": "***********" -- 错误信息},"ts": 1592144741000 -- 跳入时间戳}
建表数据
-- 删除表
DROP TABLE IF EXISTS ods_log_inc;
-- 创建外部表
CREATE EXTERNAL TABLE ods_log_inc
(
`common` STRUCT<ar :STRING, ba :STRING, ch :STRING, is_new :STRING, md :STRING, mid :STRING, os :STRING, uid
:STRING, vc :STRING> COMMENT '公共信息',
`page` STRUCT<during_time :BIGINT, item :STRING, item_type :STRING, last_page_id :STRING, page_id :STRING,
source_type :STRING> COMMENT '页面信息',
`actions` ARRAY<STRUCT<action_id:STRING, item:STRING, item_type:STRING, ts:BIGINT>> COMMENT '动作信息',
`displays` ARRAY<STRUCT<display_type :STRING, item :STRING, item_type :STRING, `order` :STRING, pos_id
:STRING>> COMMENT '曝光信息',
`start` STRUCT<entry :STRING, loading_time :BIGINT, open_ad_id :BIGINT, open_ad_ms :BIGINT, open_ad_skip_ms
:BIGINT> COMMENT '启动信息',
`err` STRUCT<error_code:BIGINT, msg:STRING> COMMENT '错误信息',
`ts` BIGINT COMMENT '时间戳'
) COMMENT '活动信息表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/gmall/ods/ods_log_inc/';
数据装载
load data inpath '/origin_data/gmall/log/topic_log/2020-06-14'
into table ods_log_inc
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select common,
page,
actions,
displays,
`start`,
err,
ts,
dt
from ods_log_inc
where dt = '2020-06-14';

每日数据装载脚本
在 cpu101 的 /home/cpu/bin 目录下创建 hdfs_to_ods_log.sh
vim hdfs_to_ods_log.sh
内容 :
#!/bin/bash
# 定义变量方便修改
APP=gmall
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ]; then
do_date=$1
else
do_date=`date -d "-l day" +%F`
fi
echo ================== 日志日期为 $do_date ==================
sql="load data inpath '/origin_data/$APP/log/topic_log/$do_date'
into table ${APP}.ods_log_inc partition(dt='$do_date');"
hive -e "$sql"

增加脚本执行权限
chmod 777 hdfs_to_ods_log.sh

脚本用法 :
hdfs_to_ods_log.sh 2020-06-14
业务表
后台管理系统 :

全量表
活动信息表

-- 删除 活动信息表
DROP TABLE IF EXISTS ods_activity_info_full;
-- 创建外部 活动信息表
CREATE EXTERNAL TABLE ods_activity_info_full(
`id` STRING COMMENT '活动id',
`activity_name` STRING COMMENT '活动名称',
`activity_type` STRING COMMENT '活动类型',
`activity_desc` STRING COMMENT '活动描述',
`start_time` STRING COMMENT '开始时间',
`end_time` STRING COMMENT '结束时间',
`create_time` STRING COMMENT '创建时间'
) COMMENT '活动信息表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
LOCATION '/warehouse/gmall/ods/ods_activity_info_full/';
装载该分区数据
-- 装载数据
load data inpath '/origin_data/gmall/db/activity_info_full/2020-06-14'
into table ods_activity_info_full
partition (dt = '2020-06-14');
查询活动信息表
-- 查询 活动信息表
select id,
activity_name,
activity_type,
activity_desc,
start_time,
end_time,
create_time,
dt
from ods_activity_info_full
where dt = '2020-06-14';

活动规则表

-- 删除 活动规则表
DROP TABLE IF EXISTS ods_activity_rule_full;
-- 创建外部 活动规则表
CREATE EXTERNAL TABLE ods_activity_rule_full(
`id` STRING COMMENT '编号',
`activity_id` STRING COMMENT '类型',
`activity_type` STRING COMMENT '活动类型',
`condition_amount` DECIMAL(16, 2) COMMENT '满减金额',
`condition_num` BIGINT COMMENT '满减件数',
`benefit_amount` DECIMAL(16, 2) COMMENT '优惠金额',
`benefit_discount` DECIMAL(16, 2) COMMENT '优惠折扣',
`benefit_level` STRING COMMENT '优惠级别'
) COMMENT '活动规则表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
LOCATION '/warehouse/gmall/ods/ods_activity_rule_full/';
装载该分区数据
-- 装载数据
load data inpath '/origin_data/gmall/db/activity_rule_full/2020-06-14'
into table ods_activity_rule_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
activity_id,
activity_type,
condition_amount,
condition_num,
benefit_amount,
benefit_discount,
benefit_level,
dt
from ods_activity_rule_full
where dt = '2020-06-14';

一级品类表

-- 删除 一级品类表
drop table if exists ods_base_category1_full;
-- 创建 一级品类表
create external table ods_base_category1_full (
`id` string comment '编号',
`name` string comment '分类名称'
) comment '一级品类表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_category1_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_category1_full/2020-06-14'
into table ods_base_category1_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
name,
dt
from ods_base_category1_full
where dt = '2020-06-14';

二级品类表

-- 删除 二级品类表
drop table if exists ods_base_category2_full;
-- 创建 二级品类表
create external table ods_base_category2_full
(
`id` string comment '编号',
`name` string comment '二级分类名称',
`category1_id` string comment '一级分类编号'
) comment '二级品类表'
partitioned by (`dt` STRING)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_category2_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_category2_full/2020-06-14'
into table ods_base_category2_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
name,
category1_id,
dt
from ods_base_category2_full
where dt = '2020-06-14';

三级品类表

-- 删除 三级品类表
drop table if exists ods_base_category3_full;
-- 创建外部 三级品类表
create external table ods_base_category3_full
(
`id` string comment '编号',
`name` string comment '三级分类名称',
`category2_id` string comment '二级分类编号'
) comment '三级品类表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_category3_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_category3_full/2020-06-14'
into table ods_base_category3_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
name,
category2_id,
dt
from ods_base_category3_full
where dt = '2020-06-14';

编码字典表
-- 删除 编码字典表
drop table if exists ods_base_dic_full;
-- 创建 编码字典表
create external table ods_base_dic_full
(
`dic_code` string comment '编号',
`dic_name` string comment '编码名称',
`parent_code` string comment '父编号',
`create_time` string comment '创建日期',
`operate_time` string comment '修改日期'
) comment '编码字典表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_dic_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_dic_full/2020-06-14'
into table ods_base_dic_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select dic_code,
dic_name,
parent_code,
create_time,
operate_time,
dt
from ods_base_dic_full
where dt = '2020-06-14';

省份表

-- 删除 省份表
drop table if exists ods_base_province_full;
-- 创建 省份表
create external table ods_base_province_full
(
`id` string comment '编号',
`name` string comment '省份名称',
`region_id` string comment '地区ID',
`area_code` string comment '地区编码',
`iso_code` string comment '旧版ISO-3166-2编码,供可视化使用',
`iso_3166_2` string comment '新版IOS-3166-2编码,供可视化使用'
) comment '省份表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_province_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_province_full/2020-06-14'
into table ods_base_province_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
name,
region_id,
area_code,
iso_code,
iso_3166_2,
dt
from ods_base_province_full
where dt = '2020-06-14';

地区表

-- 删除 地区表
drop table if exists ods_base_region_full;
-- 创建 地区表
create external table ods_base_region_full
(
`id` string comment '编号',
`region_name` string comment '地区名称'
) comment ''
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_region_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_region_full/2020-06-14'
into table ods_base_region_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
region_name,
dt
from ods_base_region_full
where dt = '2020-06-14';

品牌表

DROP TABLE IF EXISTS ods_base_trademark_full;
CREATE EXTERNAL TABLE ods_base_trademark_full(
`id` STRING COMMENT '编号',
`tm_name` STRING COMMENT '品牌名称',
`logo_url` STRING COMMENT '品牌logo的图片路径'
) COMMENT '品牌表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
LOCATION '/warehouse/gmall/ods/ods_base_trademark_full/';
装载数据
load data inpath '/origin_data/gmall/db/base_trademark_full/2020-06-14'
into table ods_base_trademark_full
partition (dt = '2020-06-14');
查询数据
select id,
tm_name,
logo_url,
dt
from ods_base_trademark_full
where dt = '2020-06-14';

购物车表

-- 删除 购物车表
drop table if exists ods_cart_info_full;
-- 创建 购物车表
create external table ods_cart_info_full
(
`id` string comment '编号',
`user_id` string comment '用户id',
`sku_id` string comment 'sku_id',
`cart_price` decimal(16, 2) comment '放入购物车时价格',
`sku_num` bigint comment '数量',
`img_url` string comment '商品图片地址',
`sku_name` string comment 'sku名称 (冗余)',
`is_checked` string comment '是否被选中',
`create_time` string comment '创建时间',
`operate_time` string comment '修改时间',
`is_ordered` string comment '是否已经下单',
`order_time` string comment '下单时间',
`source_type` string comment '来源类型',
`source_id` string comment '来源编号'
) comment '购物车全量表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_cart_info_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/cart_info_full/2020-06-14'
into table ods_cart_info_full
partition (dt = '2020-06-14');
查询数据
select id,
user_id,
sku_id,
cart_price,
sku_num,
img_url,
sku_name,
is_checked,
create_time,
operate_time,
is_ordered,
order_time,
source_type,
source_id,
dt
from ods_cart_info_full
where dt = '2020-06-14';

优惠券信息表

-- 删除 优惠券信息表
drop table if exists ods_coupon_info_full;
-- 创建 优惠券信息表
create external table ods_coupon_info_full
(
`id` string comment '购物券编号',
`coupon_name` string comment '购物券名称',
`coupon_type` string comment '购物券类型 1 现金券 2 折扣券 3 满减券 4 满件打折券',
`condition_amount` DECIMAL(16, 2) COMMENT '满额数',
`condition_num` BIGINT COMMENT '满件数',
`activity_id` STRING COMMENT '活动编号',
`benefit_amount` DECIMAL(16, 2) COMMENT '减金额',
`benefit_discount` DECIMAL(16, 2) COMMENT '折扣',
`create_time` STRING COMMENT '创建时间',
`range_type` STRING COMMENT '范围类型 1、商品 2、品类 3、品牌',
`limit_num` BIGINT COMMENT '最多领用次数',
`taken_count` BIGINT COMMENT '已领用次数',
`start_time` STRING COMMENT '开始领取时间',
`end_time` STRING COMMENT '结束领取时间',
`operate_time` STRING COMMENT '修改时间',
`expire_time` STRING COMMENT '过期时间'
) comment '优惠券信息表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_coupon_info_full/';
-- 装载数据
load data inpath '/origin_data/gmall/db/coupon_info_full/2020-06-14'
into table ods_coupon_info_full
partition (dt = '2020-06-14');
-- 查询数据
select id,
coupon_name,
coupon_type,
condition_amount,
condition_num,
activity_id,
benefit_amount,
benefit_discount,
create_time,
range_type,
limit_num,
taken_count,
start_time,
end_time,
operate_time,
expire_time,
dt
from ods_coupon_info_full
where dt = '2020-06-14';

商品平台属性表

创建 商品平台属性表
-- 删除 商品平台属性表
drop table if exists ods_sku_attr_value_full;
-- 创建 商品平台属性表
create external table ods_sku_attr_value_full
(
`id` string comment '编号',
`attr_id` string comment '平台属性ID',
`value_id` string comment '平台属性值ID',
`sku_id` string comment '商品ID',
`attr_name` string comment '平台属性名称',
`values_name` string comment '平台属性值名称'
) comment 'sku平台属性表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_sku_attr_value_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/sku_attr_value_full/2020-06-14'
into table ods_sku_attr_value_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
attr_id,
value_id,
sku_id,
attr_name,
values_name,
dt
from ods_sku_attr_value_full
where dt = '2020-06-14';

商品表

-- 删除 商品表
drop table if exists ods_sku_info_full;
-- 创建 商品表
create external table ods_sku_info_full
(
`id` string comment 'skuId',
`spu_id` string comment 'spuid',
`price` decimal(16, 2) comment '价格',
`sku_name` string comment '商品名称',
`sku_desc` string comment '商品描述',
`weight` decimal(16, 2) comment '重量',
`tm_id` string comment '品牌id',
`category3_id` string comment '品类id',
`sku_default_igm` string comment '商品图片地址',
`is_sale` string comment '是否在售',
`create_time` string comment '创建时间'
) comment 'SKU商品表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_sku_info_full/';
-- 装载数据
load data inpath '/origin_data/gmall/db/sku_info_full/2020-06-14'
into table ods_sku_info_full
partition (dt = '2020-06-14');
-- 查询数据
select id,
spu_id,
price,
sku_name,
sku_desc,
weight,
tm_id,
category3_id,
sku_default_igm,
is_sale,
create_time,
dt
from ods_sku_info_full
where dt = '2020-06-14';

商品销售属性值表

-- 删除 商品销售属性值表
DROP TABLE IF EXISTS ods_sku_sale_attr_value_full;
-- 创建 商品销售属性值表
CREATE EXTERNAL TABLE ods_sku_sale_attr_value_full(
`id` STRING COMMENT '编号',
`sku_id` STRING COMMENT 'sku_id',
`spu_id` STRING COMMENT 'spu_id',
`sale_attr_value_id` STRING COMMENT '销售属性值id',
`sale_attr_id` STRING COMMENT '销售属性id',
`sale_attr_name` STRING COMMENT '销售属性名称',
`sale_attr_value_name` STRING COMMENT '销售属性值名称'
) COMMENT 'sku销售属性名称'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
LOCATION '/warehouse/gmall/ods/ods_sku_sale_attr_value_full/';
-- 装载数据
load data inpath '/origin_data/gmall/db/sku_sale_attr_value_full/2020-06-14'
into table ods_sku_sale_attr_value_full
partition (dt = '2020-06-14');
-- 查询数据
select id,
sku_id,
spu_id,
sale_attr_value_id,
sale_attr_id,
sale_attr_name,
sale_attr_value_name,
dt
from ods_sku_sale_attr_value_full
where dt = '2020-06-14';

SPU表

-- 删除 SPU表
drop table if exists ods_spu_info_full;
-- 创建 SPU表
create external table ods_spu_info_full
(
`id` STRING COMMENT 'spu_id',
`spu_name` STRING COMMENT 'spu名称',
`description` STRING COMMENT '描述信息',
`category3_id` STRING COMMENT '品类id',
`tm_id` STRING COMMENT '品牌id'
) comment 'SPU商品表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_spu_info_full/';
-- 加载数据
load data inpath '/origin_data/gmall/db/spu_info_full/2020-06-14'
into table ods_spu_info_full
partition (dt = '2020-06-14');
-- 查询
select id,
spu_name,
description,
category3_id,
tm_id,
dt
from ods_spu_info_full
where dt = '2020-06-14';

增量表
电商业务表 :

购物车表

hadoop fs -cat /origin_data/gmall/db/cart_info_inc/2020-06-14 | zcat

sql :
-- 删除购物车表
drop table if exists ods_cart_info_inc;
-- 创建 购物车表
create external table ods_cart_info_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string, user_id :string,sku_id :string, cart_price :decimal(16, 2), sku_num :bigint,img_url
:string,sku_name :string,is_checked :string,create_time :string, operate_time :string,is_ordered
:string,order_time :string,source_type :string,source_id :string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '购物车增量表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_cart_info_inc/';
-- 转载数据
load data inpath '/origin_data/gmall/db/cart_info_inc/2020-06-14'
overwrite into table ods_cart_info_inc
partition (dt = '2020-06-14');
-- 查询数据
set hive.execution.engine = spark;
select type,
ts,
data.id,
data.user_id,
data.sku_id,
data.create_time,
data.source_id,
data.source_type,
data.sku_num,
old['id'] as old_id,
old['user_id'] as old_user_id,
old['sku_id'] as old_sku_id,
old['create_time'] as old_sku_id,
old['source_id'] as old_source_id,
old['source_type'] as old_source_type,
old['sku_num'] as old_sku_num,
dt
from ods_cart_info_inc
where dt = '2020-06-14'
limit 100;

评论表

-- 删除评论表
drop table if exists ods_comment_info_inc;
-- 创建 评论表
create external table ods_comment_info_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string, user_id :string, nick_name :string, head_img :string, sku_id :string,spu_id :string,
order_id :string, appraise :string, comment_txt :string, create_time :string, operate_time
:string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '评论表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_comment_info_inc/';
-- 装载数据
load data inpath '/origin_data/gmall/db/comment_info_inc/2020-06-14'
into table ods_comment_info_inc
partition (dt = '2020-06-14');
-- 查询数据
select type,
ts,
data.id,
data.user_id,
data.nick_name,
data.head_img,
data.sku_id,
data.spu_id,
data.order_id,
data.appraise,
data.comment_txt,
data.create_time,
data.operate_time
old,
dt
from ods_comment_info_inc
where dt = '2020-06-14';
优惠券领用表

-- 删除 优惠券领用表
drop table if exists ods_coupon_use_inc;
-- 创建 优惠券领用表
create external table ods_coupon_use_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string, coupon_id :string, user_id :string, order_id :string, coupon_status :string, get_time
:string, using_time :string, used_time :string, expire_time :string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '优惠券领用表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_coupon_use_inc/';
-- 转载数据
load data inpath '/origin_data/gmall/db/coupon_use_inc/2020-06-14'
into table ods_coupon_use_inc
partition (dt = '2020-06-14');
-- 查询数据
select type,
ts,
data.id,
data.coupon_id,
data.user_id,
data.order_id,
data.coupon_status,
data.get_time,
data.using_time,
data.used_time,
data.expire_time,
old,
dt
from ods_coupon_use_inc
where dt = '2020-06-14';
收藏表

-- 删除 收藏表
drop table if exists ods_favor_info_inc;
-- 创建 收藏表
create external table ods_favor_info_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string, user_id :string, sku_id :string, spu_id :string, is_cancel :string, create_time :string,
cancel_time :string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '收藏表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_favor_info_inc/';
-- 装载数据
load data inpath '/origin_data/gmall/db/favor_info_inc/2020-06-14'
into table ods_favor_info_inc
partition (dt = '2020-06-14');
-- 查询数据
select type,
ts,
data,
old,
dt
from ods_favor_info_inc
where dt = '2020-06-14';
订单明细表

DROP TABLE IF EXISTS ods_order_detail_inc;
CREATE EXTERNAL TABLE ods_order_detail_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id:STRING, order_id:STRING, sku_id:STRING, sku_name:STRING, img_url:STRING, order_price:DECIMAL(16, 2), sku_num :BIGINT, create_time:STRING, source_type:STRING, source_id:STRING, split_total_amount:DECIMAL(16, 2), split_activity_amount:DECIMAL(16, 2), split_coupon_amount:DECIMAL(16, 2)> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单明细表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/gmall/ods/ods_order_detail_inc/';
订单明细活动关联表

DROP TABLE IF EXISTS ods_order_detail_activity_inc;
CREATE EXTERNAL TABLE ods_order_detail_activity_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id:STRING, order_id:STRING, order_detail_id:STRING, activity_id:STRING, activity_rule_id:STRING, sku_id:STRING, create_time:STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单明细活动关联表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/gmall/ods/ods_order_detail_activity_inc/';
订单明细优惠券关联表

DROP TABLE IF EXISTS ods_order_detail_coupon_inc;
CREATE EXTERNAL TABLE ods_order_detail_coupon_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id:STRING, order_id:STRING, order_detail_id:STRING, coupon_id:STRING, coupon_use_id:STRING, sku_id:STRING, create_time:STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单明细优惠券关联表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/gmall/ods/ods_order_detail_coupon_inc/';
订单表

DROP TABLE IF EXISTS ods_order_info_inc;
CREATE EXTERNAL TABLE ods_order_info_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id:STRING, consignee:STRING, consignee_tel :STRING, total_amount:DECIMAL(16, 2), order_status:STRING, user_id:STRING, payment_way :STRING,delivery_address :STRING, order_comment :STRING, out_trade_no :STRING, trade_body:STRING, create_time :STRING,operate_time :STRING, expire_time :STRING, process_status :STRING, tracking_no:STRING, parent_order_id :STRING, img_url :STRING, province_id :STRING, activity_reduce_amount:DECIMAL(16, 2), coupon_reduce_amount :DECIMAL(16, 2), original_total_amount :DECIMAL(16, 2), freight_fee:DECIMAL(16, 2), freight_fee_reduce :DECIMAL(16, 2), refundable_time :DECIMAL(16, 2)> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '订单表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/gmall/ods/ods_order_info_inc/';
退单表

DROP TABLE IF EXISTS ods_order_refund_info_inc;
CREATE EXTERNAL TABLE ods_order_refund_info_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,user_id :STRING,order_id :STRING,sku_id :STRING,refund_type :STRING,refund_num :BIGINT,refund_amount:DECIMAL(16, 2),refund_reason_type :STRING,refund_reason_txt :STRING,refund_status :STRING,create_time:STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '退单表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/gmall/ods/ods_order_refund_info_inc/';
订单状态流水表

DROP TABLE IF EXISTS ods_order_status_log_inc;
CREATE EXTERNAL TABLE ods_order_status_log_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,order_id :STRING,order_status :STRING,operate_time :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '退单表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/gmall/ods/ods_order_status_log_inc/';
支付表

DROP TABLE IF EXISTS ods_payment_info_inc;
CREATE EXTERNAL TABLE ods_payment_info_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,out_trade_no :STRING,order_id :STRING,user_id :STRING,payment_type :STRING,trade_no:STRING,total_amount :DECIMAL(16, 2),subject :STRING,payment_status :STRING,create_time :STRING,callback_time:STRING,callback_content :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '支付表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/gmall/ods/ods_payment_info_inc/';
退款表

DROP TABLE IF EXISTS ods_refund_payment_inc;
CREATE EXTERNAL TABLE ods_refund_payment_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,out_trade_no :STRING,order_id :STRING,sku_id :STRING,payment_type :STRING,trade_no :STRING,total_amount:DECIMAL(16, 2),subject :STRING,refund_status :STRING,create_time :STRING,callback_time :STRING,callback_content:STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '退款表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/gmall/ods/ods_refund_payment_inc/';
用户表

DROP TABLE IF EXISTS ods_user_info_inc;
CREATE EXTERNAL TABLE ods_user_info_inc(
`type` STRING COMMENT '变动类型',
`ts` BIGINT COMMENT '变动时间',
`data` STRUCT<id :STRING,login_name :STRING,nick_name :STRING,passwd :STRING,name :STRING,phone_num :STRING,email:STRING,head_img :STRING,user_level :STRING,birthday :STRING,gender :STRING,create_time :STRING,operate_time:STRING,status :STRING> COMMENT '数据',
`old` MAP<STRING,STRING> COMMENT '旧值'
) COMMENT '用户表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'
LOCATION '/warehouse/gmall/ods/ods_user_info_inc/';
数据装载脚本
在 cpu101 的 /home/cpu/bin 目录下创建 hdfs_to_ods_db.sh
vim hdfs_to_ods_db.sh
#!/bin/bash
APP=gmall
if [ -n "$2" ] ;then
do_date=$2
else
do_date=`date -d '-1 day' +%F`
fi
load_data(){
sql=""
for i in $*; do
#判断路径是否存在
hadoop fs -test -e /origin_data/$APP/db/${i:4}/$do_date
#路径存在方可装载数据
if [[ $? = 0 ]]; then
sql=$sql"load data inpath '/origin_data/$APP/db/${i:4}/$do_date' OVERWRITE into table ${APP}.$i partition(dt='$do_date');"
fi
done
hive -e "$sql"
}
case $1 in
"ods_activity_info_full")
load_data "ods_activity_info_full"
;;
"ods_activity_rule_full")
load_data "ods_activity_rule_full"
;;
"ods_base_category1_full")
load_data "ods_base_category1_full"
;;
"ods_base_category2_full")
load_data "ods_base_category2_full"
;;
"ods_base_category3_full")
load_data "ods_base_category3_full"
;;
"ods_base_dic_full")
load_data "ods_base_dic_full"
;;
"ods_base_province_full")
load_data "ods_base_province_full"
;;
"ods_base_region_full")
load_data "ods_base_region_full"
;;
"ods_base_trademark_full")
load_data "ods_base_trademark_full"
;;
"ods_cart_info_full")
load_data "ods_cart_info_full"
;;
"ods_coupon_info_full")
load_data "ods_coupon_info_full"
;;
"ods_sku_attr_value_full")
load_data "ods_sku_attr_value_full"
;;
"ods_sku_info_full")
load_data "ods_sku_info_full"
;;
"ods_sku_sale_attr_value_full")
load_data "ods_sku_sale_attr_value_full"
;;
"ods_spu_info_full")
load_data "ods_spu_info_full"
;;
"ods_cart_info_inc")
load_data "ods_cart_info_inc"
;;
"ods_comment_info_inc")
load_data "ods_comment_info_inc"
;;
"ods_coupon_use_inc")
load_data "ods_coupon_use_inc"
;;
"ods_favor_info_inc")
load_data "ods_favor_info_inc"
;;
"ods_order_detail_inc")
load_data "ods_order_detail_inc"
;;
"ods_order_detail_activity_inc")
load_data "ods_order_detail_activity_inc"
;;
"ods_order_detail_coupon_inc")
load_data "ods_order_detail_coupon_inc"
;;
"ods_order_info_inc")
load_data "ods_order_info_inc"
;;
"ods_order_refund_info_inc")
load_data "ods_order_refund_info_inc"
;;
"ods_order_status_log_inc")
load_data "ods_order_status_log_inc"
;;
"ods_payment_info_inc")
load_data "ods_payment_info_inc"
;;
"ods_refund_payment_inc")
load_data "ods_refund_payment_inc"
;;
"ods_user_info_inc")
load_data "ods_user_info_inc"
;;
"all")
load_data "ods_activity_info_full" "ods_activity_rule_full" "ods_base_category1_full" "ods_base_category2_full" "ods_base_category3_full" "ods_base_dic_full" "ods_base_province_full" "ods_base_region_full" "ods_base_trademark_full" "ods_cart_info_full" "ods_coupon_info_full" "ods_sku_attr_value_full" "ods_sku_info_full" "ods_sku_sale_attr_value_full" "ods_spu_info_full" "ods_cart_info_inc" "ods_comment_info_inc" "ods_coupon_use_inc" "ods_favor_info_inc" "ods_order_detail_inc" "ods_order_detail_activity_inc" "ods_order_detail_coupon_inc" "ods_order_info_inc" "ods_order_refund_info_inc" "ods_order_status_log_inc" "ods_payment_info_inc" "ods_refund_payment_inc" "ods_user_info_inc"
;;
esac

增加脚本执行权限
chmod 777 hdfs_to_ods_db.sh

脚本用法 :
hdfs_to_ods_db.sh all 2020-06-14

