ODS 层的设计要点如下:
- ODS 层的表结构设计受业务系统同步过来的数据结构
- ODS 层保存全部历史数据,所以压缩格式选择压缩高 ( gzip )
- ODS 层表名的命名规范:ods表名单分区增量全量标识( inc / full )
日志表
日志数据格式
hadoop fs -cat /origin_data/gmall/log/topic_log/2020-06-14/* | zcat

{"common": { -- 环境信息"ar": "440000", -- 地区编码"ba": "Xiaomi", -- 手机品牌"ch": "vivo", -- 渠道"is_new": "1", -- 是否首日使用,首次使用的当日,该字段值为1,过了24:00,该字段置为0"md": "Xiaomi 9", -- 手机型号"mid": "mid_625416", -- 设备id"os": "Android 11.0", -- 操作系统"uid": "209", -- 会员id"vc": "v2.1.111" -- app版本号},"page": { -- 页面信息"during_time": 2815, -- 持续时间毫秒"item": "28", -- 目标id"item_type": "sku_id", -- 目标类型"last_page_id": "register", -- 上页类型"page_id": "good_detail", -- 页面ID"source_type": "promotion" -- 来源类型},"actions": [ -- 动作(事件){"action_id": "get_coupon", -- 动作id"item": "2", -- 目标id"item_type": "coupon_id", -- 目标类型"ts": 1592144742407 -- 动作时间戳}],"displays": [ -- 曝光信息{"display_type": "promotion", -- 曝光类型"item": "32", -- 曝光对象id"item_type": "sku_id", -- 曝光对象类型"order": 1, -- 出现顺序"pos_id": 1 -- 曝光位置}],"start": { -- 启动日志"entry": "icon", -- icon 手机图标 notice 通知 install 安装后启动"loading_time": 18803, -- 启动加载时间"open_ad_id": 7, -- 广告页ID"open_ad_ms": 3449, -- 广告总共播放时间"open_ad_skip_ms": 1989 -- 用户跳过广告时点},"err": { -- 错误"error_code": "1234", -- 错误码"msg": "***********" -- 错误信息},"ts": 1592144741000 -- 跳入时间戳}
建表数据
-- 删除表DROP TABLE IF EXISTS ods_log_inc;-- 创建外部表CREATE EXTERNAL TABLE ods_log_inc(`common` STRUCT<ar :STRING, ba :STRING, ch :STRING, is_new :STRING, md :STRING, mid :STRING, os :STRING, uid:STRING, vc :STRING> COMMENT '公共信息',`page` STRUCT<during_time :BIGINT, item :STRING, item_type :STRING, last_page_id :STRING, page_id :STRING,source_type :STRING> COMMENT '页面信息',`actions` ARRAY<STRUCT<action_id:STRING, item:STRING, item_type:STRING, ts:BIGINT>> COMMENT '动作信息',`displays` ARRAY<STRUCT<display_type :STRING, item :STRING, item_type :STRING, `order` :STRING, pos_id:STRING>> COMMENT '曝光信息',`start` STRUCT<entry :STRING, loading_time :BIGINT, open_ad_id :BIGINT, open_ad_ms :BIGINT, open_ad_skip_ms:BIGINT> COMMENT '启动信息',`err` STRUCT<error_code:BIGINT, msg:STRING> COMMENT '错误信息',`ts` BIGINT COMMENT '时间戳') COMMENT '活动信息表'PARTITIONED BY (`dt` STRING)ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.JsonSerDe'LOCATION '/warehouse/gmall/ods/ods_log_inc/';
数据装载
load data inpath '/origin_data/gmall/log/topic_log/2020-06-14'
into table ods_log_inc
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select common,
page,
actions,
displays,
`start`,
err,
ts,
dt
from ods_log_inc
where dt = '2020-06-14';

每日数据装载脚本
在 cpu101 的 /home/cpu/bin 目录下创建 hdfs_to_ods_log.sh
vim hdfs_to_ods_log.sh
内容 :
#!/bin/bash
# 定义变量方便修改
APP=gmall
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ]; then
do_date=$1
else
do_date=`date -d "-l day" +%F`
fi
echo ================== 日志日期为 $do_date ==================
sql="load data inpath '/origin_data/$APP/log/topic_log/$do_date'
into table ${APP}.ods_log_inc partition(dt='$do_date');"
hive -e "$sql"

增加脚本执行权限
chmod 777 hdfs_to_ods_log.sh

脚本用法 :
hdfs_to_ods_log.sh 2020-06-14
业务表
后台管理系统 :

全量表
活动信息表

-- 删除 活动信息表
DROP TABLE IF EXISTS ods_activity_info_full;
-- 创建外部 活动信息表
CREATE EXTERNAL TABLE ods_activity_info_full(
`id` STRING COMMENT '活动id',
`activity_name` STRING COMMENT '活动名称',
`activity_type` STRING COMMENT '活动类型',
`activity_desc` STRING COMMENT '活动描述',
`start_time` STRING COMMENT '开始时间',
`end_time` STRING COMMENT '结束时间',
`create_time` STRING COMMENT '创建时间'
) COMMENT '活动信息表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
LOCATION '/warehouse/gmall/ods/ods_activity_info_full/';
装载该分区数据
-- 装载数据
load data inpath '/origin_data/gmall/db/activity_info_full/2020-06-14'
into table ods_activity_info_full
partition (dt = '2020-06-14');
查询活动信息表
-- 查询 活动信息表
select id,
activity_name,
activity_type,
activity_desc,
start_time,
end_time,
create_time,
dt
from ods_activity_info_full
where dt = '2020-06-14';

活动规则表

-- 删除 活动规则表
DROP TABLE IF EXISTS ods_activity_rule_full;
-- 创建外部 活动规则表
CREATE EXTERNAL TABLE ods_activity_rule_full(
`id` STRING COMMENT '编号',
`activity_id` STRING COMMENT '类型',
`activity_type` STRING COMMENT '活动类型',
`condition_amount` DECIMAL(16, 2) COMMENT '满减金额',
`condition_num` BIGINT COMMENT '满减件数',
`benefit_amount` DECIMAL(16, 2) COMMENT '优惠金额',
`benefit_discount` DECIMAL(16, 2) COMMENT '优惠折扣',
`benefit_level` STRING COMMENT '优惠级别'
) COMMENT '活动规则表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
LOCATION '/warehouse/gmall/ods/ods_activity_rule_full/';
装载该分区数据
-- 装载数据
load data inpath '/origin_data/gmall/db/activity_rule_full/2020-06-14'
into table ods_activity_rule_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
activity_id,
activity_type,
condition_amount,
condition_num,
benefit_amount,
benefit_discount,
benefit_level,
dt
from ods_activity_rule_full
where dt = '2020-06-14';

一级品类表

-- 删除 一级品类表
drop table if exists ods_base_category1_full;
-- 创建 一级品类表
create external table ods_base_category1_full (
`id` string comment '编号',
`name` string comment '分类名称'
) comment '一级品类表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_category1_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_category1_full/2020-06-14'
into table ods_base_category1_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
name,
dt
from ods_base_category1_full
where dt = '2020-06-14';

二级品类表

-- 删除 二级品类表
drop table if exists ods_base_category2_full;
-- 创建 二级品类表
create external table ods_base_category2_full
(
`id` string comment '编号',
`name` string comment '二级分类名称',
`category1_id` string comment '一级分类编号'
) comment '二级品类表'
partitioned by (`dt` STRING)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_category2_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_category2_full/2020-06-14'
into table ods_base_category2_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
name,
category1_id,
dt
from ods_base_category2_full
where dt = '2020-06-14';

三级品类表

-- 删除 三级品类表
drop table if exists ods_base_category3_full;
-- 创建外部 三级品类表
create external table ods_base_category3_full
(
`id` string comment '编号',
`name` string comment '三级分类名称',
`category2_id` string comment '二级分类编号'
) comment '三级品类表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_category3_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_category3_full/2020-06-14'
into table ods_base_category3_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
name,
category2_id,
dt
from ods_base_category3_full
where dt = '2020-06-14';

编码字典表
-- 删除 编码字典表
drop table if exists ods_base_dic_full;
-- 创建 编码字典表
create external table ods_base_dic_full
(
`dic_code` string comment '编号',
`dic_name` string comment '编码名称',
`parent_code` string comment '父编号',
`create_time` string comment '创建日期',
`operate_time` string comment '修改日期'
) comment '编码字典表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_dic_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_dic_full/2020-06-14'
into table ods_base_dic_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select dic_code,
dic_name,
parent_code,
create_time,
operate_time,
dt
from ods_base_dic_full
where dt = '2020-06-14';

省份表

-- 删除 省份表
drop table if exists ods_base_province_full;
-- 创建 省份表
create external table ods_base_province_full
(
`id` string comment '编号',
`name` string comment '省份名称',
`region_id` string comment '地区ID',
`area_code` string comment '地区编码',
`iso_code` string comment '旧版ISO-3166-2编码,供可视化使用',
`iso_3166_2` string comment '新版IOS-3166-2编码,供可视化使用'
) comment '省份表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_province_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_province_full/2020-06-14'
into table ods_base_province_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
name,
region_id,
area_code,
iso_code,
iso_3166_2,
dt
from ods_base_province_full
where dt = '2020-06-14';

地区表

-- 删除 地区表
drop table if exists ods_base_region_full;
-- 创建 地区表
create external table ods_base_region_full
(
`id` string comment '编号',
`region_name` string comment '地区名称'
) comment ''
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_base_region_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/base_region_full/2020-06-14'
into table ods_base_region_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
region_name,
dt
from ods_base_region_full
where dt = '2020-06-14';

品牌表

DROP TABLE IF EXISTS ods_base_trademark_full;
CREATE EXTERNAL TABLE ods_base_trademark_full(
`id` STRING COMMENT '编号',
`tm_name` STRING COMMENT '品牌名称',
`logo_url` STRING COMMENT '品牌logo的图片路径'
) COMMENT '品牌表'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
LOCATION '/warehouse/gmall/ods/ods_base_trademark_full/';
装载数据
load data inpath '/origin_data/gmall/db/base_trademark_full/2020-06-14'
into table ods_base_trademark_full
partition (dt = '2020-06-14');
查询数据
select id,
tm_name,
logo_url,
dt
from ods_base_trademark_full
where dt = '2020-06-14';

购物车表

-- 删除 购物车表
drop table if exists ods_cart_info_full;
-- 创建 购物车表
create external table ods_cart_info_full
(
`id` string comment '编号',
`user_id` string comment '用户id',
`sku_id` string comment 'sku_id',
`cart_price` decimal(16, 2) comment '放入购物车时价格',
`sku_num` bigint comment '数量',
`img_url` string comment '商品图片地址',
`sku_name` string comment 'sku名称 (冗余)',
`is_checked` string comment '是否被选中',
`create_time` string comment '创建时间',
`operate_time` string comment '修改时间',
`is_ordered` string comment '是否已经下单',
`order_time` string comment '下单时间',
`source_type` string comment '来源类型',
`source_id` string comment '来源编号'
) comment '购物车全量表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_cart_info_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/cart_info_full/2020-06-14'
into table ods_cart_info_full
partition (dt = '2020-06-14');
查询数据
select id,
user_id,
sku_id,
cart_price,
sku_num,
img_url,
sku_name,
is_checked,
create_time,
operate_time,
is_ordered,
order_time,
source_type,
source_id,
dt
from ods_cart_info_full
where dt = '2020-06-14';

优惠券信息表

-- 删除 优惠券信息表
drop table if exists ods_coupon_info_full;
-- 创建 优惠券信息表
create external table ods_coupon_info_full
(
`id` string comment '购物券编号',
`coupon_name` string comment '购物券名称',
`coupon_type` string comment '购物券类型 1 现金券 2 折扣券 3 满减券 4 满件打折券',
`condition_amount` DECIMAL(16, 2) COMMENT '满额数',
`condition_num` BIGINT COMMENT '满件数',
`activity_id` STRING COMMENT '活动编号',
`benefit_amount` DECIMAL(16, 2) COMMENT '减金额',
`benefit_discount` DECIMAL(16, 2) COMMENT '折扣',
`create_time` STRING COMMENT '创建时间',
`range_type` STRING COMMENT '范围类型 1、商品 2、品类 3、品牌',
`limit_num` BIGINT COMMENT '最多领用次数',
`taken_count` BIGINT COMMENT '已领用次数',
`start_time` STRING COMMENT '开始领取时间',
`end_time` STRING COMMENT '结束领取时间',
`operate_time` STRING COMMENT '修改时间',
`expire_time` STRING COMMENT '过期时间'
) comment '优惠券信息表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_coupon_info_full/';
-- 装载数据
load data inpath '/origin_data/gmall/db/coupon_info_full/2020-06-14'
into table ods_coupon_info_full
partition (dt = '2020-06-14');
-- 查询数据
select id,
coupon_name,
coupon_type,
condition_amount,
condition_num,
activity_id,
benefit_amount,
benefit_discount,
create_time,
range_type,
limit_num,
taken_count,
start_time,
end_time,
operate_time,
expire_time,
dt
from ods_coupon_info_full
where dt = '2020-06-14';

商品平台属性表

创建 商品平台属性表
-- 删除 商品平台属性表
drop table if exists ods_sku_attr_value_full;
-- 创建 商品平台属性表
create external table ods_sku_attr_value_full
(
`id` string comment '编号',
`attr_id` string comment '平台属性ID',
`value_id` string comment '平台属性值ID',
`sku_id` string comment '商品ID',
`attr_name` string comment '平台属性名称',
`values_name` string comment '平台属性值名称'
) comment 'sku平台属性表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_sku_attr_value_full/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/sku_attr_value_full/2020-06-14'
into table ods_sku_attr_value_full
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select id,
attr_id,
value_id,
sku_id,
attr_name,
values_name,
dt
from ods_sku_attr_value_full
where dt = '2020-06-14';

商品表

-- 删除 商品表
drop table if exists ods_sku_info_full;
-- 创建 商品表
create external table ods_sku_info_full
(
`id` string comment 'skuId',
`spu_id` string comment 'spuid',
`price` decimal(16, 2) comment '价格',
`sku_name` string comment '商品名称',
`sku_desc` string comment '商品描述',
`weight` decimal(16, 2) comment '重量',
`tm_id` string comment '品牌id',
`category3_id` string comment '品类id',
`sku_default_igm` string comment '商品图片地址',
`is_sale` string comment '是否在售',
`create_time` string comment '创建时间'
) comment 'SKU商品表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_sku_info_full/';
-- 装载数据
load data inpath '/origin_data/gmall/db/sku_info_full/2020-06-14'
into table ods_sku_info_full
partition (dt = '2020-06-14');
-- 查询数据
select id,
spu_id,
price,
sku_name,
sku_desc,
weight,
tm_id,
category3_id,
sku_default_igm,
is_sale,
create_time,
dt
from ods_sku_info_full
where dt = '2020-06-14';

商品销售属性值表

-- 删除 商品销售属性值表
DROP TABLE IF EXISTS ods_sku_sale_attr_value_full;
-- 创建 商品销售属性值表
CREATE EXTERNAL TABLE ods_sku_sale_attr_value_full(
`id` STRING COMMENT '编号',
`sku_id` STRING COMMENT 'sku_id',
`spu_id` STRING COMMENT 'spu_id',
`sale_attr_value_id` STRING COMMENT '销售属性值id',
`sale_attr_id` STRING COMMENT '销售属性id',
`sale_attr_name` STRING COMMENT '销售属性名称',
`sale_attr_value_name` STRING COMMENT '销售属性值名称'
) COMMENT 'sku销售属性名称'
PARTITIONED BY (`dt` STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
NULL DEFINED AS ''
LOCATION '/warehouse/gmall/ods/ods_sku_sale_attr_value_full/';
-- 装载数据
load data inpath '/origin_data/gmall/db/sku_sale_attr_value_full/2020-06-14'
into table ods_sku_sale_attr_value_full
partition (dt = '2020-06-14');
-- 查询数据
select id,
sku_id,
spu_id,
sale_attr_value_id,
sale_attr_id,
sale_attr_name,
sale_attr_value_name,
dt
from ods_sku_sale_attr_value_full
where dt = '2020-06-14';

SPU表

-- 删除 SPU表
drop table if exists ods_spu_info_full;
-- 创建 SPU表
create external table ods_spu_info_full
(
`id` STRING COMMENT 'spu_id',
`spu_name` STRING COMMENT 'spu名称',
`description` STRING COMMENT '描述信息',
`category3_id` STRING COMMENT '品类id',
`tm_id` STRING COMMENT '品牌id'
) comment 'SPU商品表'
partitioned by (`dt` string)
row format delimited fields terminated by '\t'
null defined as ''
location '/warehouse/gmall/ods/ods_spu_info_full/';
-- 加载数据
load data inpath '/origin_data/gmall/db/spu_info_full/2020-06-14'
into table ods_spu_info_full
partition (dt = '2020-06-14');
-- 查询
select id,
spu_name,
description,
category3_id,
tm_id,
dt
from ods_spu_info_full
where dt = '2020-06-14';

增量表
电商业务表 :

购物车表

hadoop fs -cat /origin_data/gmall/db/cart_info_inc/2020-06-14 | zcat

sql :
-- 删除购物车表
drop table if exists ods_cart_info_inc;
-- 创建 购物车表
create external table ods_cart_info_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string, user_id :string,sku_id :string, cart_price :decimal(16, 2), sku_num :bigint,img_url
:string,sku_name :string,is_checked :string,create_time :string, operate_time :string,is_ordered
:string,order_time :string,source_type :string,source_id :string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '购物车增量表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_cart_info_inc/';
-- 转载数据
load data inpath '/origin_data/gmall/db/cart_info_inc/2020-06-14'
overwrite into table ods_cart_info_inc
partition (dt = '2020-06-14');
-- 查询数据
set hive.execution.engine = spark;
select type,
ts,
data.id,
data.user_id,
data.sku_id,
data.create_time,
data.source_id,
data.source_type,
data.sku_num,
old['id'] as old_id,
old['user_id'] as old_user_id,
old['sku_id'] as old_sku_id,
old['create_time'] as old_sku_id,
old['source_id'] as old_source_id,
old['source_type'] as old_source_type,
old['sku_num'] as old_sku_num,
dt
from ods_cart_info_inc
where dt = '2020-06-14'
limit 100;

评论表

-- 删除评论表
drop table if exists ods_comment_info_inc;
-- 创建 评论表
create external table ods_comment_info_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string, user_id :string, nick_name :string, head_img :string, sku_id :string,spu_id :string,
order_id :string, appraise :string, comment_txt :string, create_time :string, operate_time
:string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '评论表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_comment_info_inc/';
-- 装载数据
load data inpath '/origin_data/gmall/db/comment_info_inc/2020-06-14'
into table ods_comment_info_inc
partition (dt = '2020-06-14');
-- 查询数据
select type,
ts,
data.id,
data.user_id,
data.nick_name,
data.head_img,
data.sku_id,
data.spu_id,
data.order_id,
data.appraise,
data.comment_txt,
data.create_time,
data.operate_time
old,
dt
from ods_comment_info_inc
where dt = '2020-06-14';
优惠券领用表

建表
-- 删除 优惠券领用表
drop table if exists ods_coupon_use_inc;
-- 创建 优惠券领用表
create external table ods_coupon_use_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string, coupon_id :string, user_id :string, order_id :string, coupon_status :string, get_time
:string, using_time :string, used_time :string, expire_time :string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '优惠券领用表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_coupon_use_inc/';
装载数据
set hivevar:do_date = 2020-06-14;
load data inpath '/origin_data/gmall/db/coupon_use_inc/${do_date}'
into table ods_coupon_use_inc
partition (dt = '${do_date}');
查询数据
set hivevar:do_date = 2020-06-14;
select type,
ts,
data.id,
data.coupon_id,
data.user_id,
data.order_id,
data.coupon_status,
data.get_time,
data.using_time,
data.used_time,
data.expire_time,
old,
old['id'] as old_id,
old['coupon_id'] as old_coupon_id,
old['user_id'] as old_user_id,
old['order_id'] as old_order_id,
old['coupon_status'] as old_coupon_status,
old['get_time'] as old_get_time,
old['using_time'] as old_using_time,
old['used_time'] as old_used_time,
old['expire_time'] as old_expire_time,
dt
from ods_coupon_use_inc
where dt = '${do_date}';

收藏表

建表
-- 删除 收藏表
drop table if exists ods_favor_info_inc;
-- 创建 收藏表
create external table ods_favor_info_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string, user_id :string, sku_id :string, spu_id :string, is_cancel :string, create_time :string,
cancel_time :string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '收藏表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_favor_info_inc/';
装载数据
-- 装载数据
load data inpath '/origin_data/gmall/db/favor_info_inc/2020-06-14'
into table ods_favor_info_inc
partition (dt = '2020-06-14');
查询数据
-- 查询数据
select type,
ts,
data,
old,
dt
from ods_favor_info_inc
where dt = '2020-06-14';

订单明细表

-- 创建 订单明细表
drop table if exists ods_order_detail_inc;
create external table ods_order_detail_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string, order_id :string, sku_id :string, sku_name :string, img_url :string, order_price
:decimal(16, 2), sku_num :bigint, create_time :string, source_type :string, source_id :string,
split_total_amount :decimal(16, 2), split_activity_amount :decimal(16, 2), split_coupon_amount
:decimal(16, 2)> comment '数据',
`old` map<string,string> comment '旧值'
) comment '订单明细表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_order_detail_inc/';
装载数据
-- 装载数据
set hivevar:do_date = 2020-06-14;
load data inpath '/origin_data/gmall/db/order_detail_inc/${do_date}'
into table ods_order_detail_inc
partition (dt = '${do_date}');
查询数据
-- 查询数据
set hivevar:do_date = 2020-06-14;
select type,
ts,
data.id,
data.order_id,
data.sku_id,
data.sku_name,
data.img_url,
data.order_price,
data.sku_num,
data.create_time,
data.source_type,
data.source_id,
data.split_total_amount,
data.split_activity_amount,
data.split_coupon_amount,
old,
dt
from ods_order_detail_inc
where dt = '${do_date}'
limit 10;

订单明细活动关联表

-- 创建 订单明细活动关联表
drop table if exists ods_order_detail_activity_inc;
create external table ods_order_detail_activity_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string, order_id :string, order_detail_id :string, activity_id :string, activity_rule_id :string,
sku_id :string, create_time :string> comment '数据',
`old` map<string,string> COMMENT '旧值'
) comment '订单明细活动关联表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_order_detail_activity_inc/';
-- 装载数据
set hivevar:do_date = 2020-06-14;
load data inpath '/origin_data/gmall/db/order_detail_activity_inc/${do_date}'
into table ods_order_detail_activity_inc
partition (dt = '${do_date}');
-- 查询数据
select type,
ts,
data.id ,
data.order_id ,
data.order_detail_id ,
data.activity_id ,
data.activity_rule_id ,
data.sku_id ,
data.create_time ,
old,
dt
from ods_order_detail_activity_inc
where dt = '${do_date}'
limit 10;

订单明细优惠券关联表

-- 创建 订单明细优惠券关联表
drop table if exists ods_order_detail_coupon_inc;
create external table ods_order_detail_coupon_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string, order_id :string, order_detail_id :string, coupon_id :string, coupon_use_id :string,
sku_id :string, create_time :string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '订单明细优惠券关联表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_order_detail_coupon_inc/';
装载数据 :
-- 装载数据
set hivevar:do_date = 2020-06-14;
load data inpath '/origin_data/gmall/db/order_detail_coupon_inc/${do_date}'
into table ods_order_detail_coupon_inc
partition (dt = '${do_date}');
查询数据 :
select type,
ts,
data.id,
data.order_id,
data.order_detail_id,
data.coupon_id,
data.coupon_use_id,
data.sku_id,
data.create_time,
old
from ods_order_detail_coupon_inc
where dt = '${do_date}';

订单表

-- 创建订单表
drop table if exists ods_order_info_inc;
create external table ods_order_info_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id, data.consignee, data.consignee_tel, data.total_amount, data.order_status, data.user_id, data.
payment_way,delivery_address, data.order_comment, out_trade_no, data.trade_body, data.create_time,operate_time,
data.expire_time, data.process_status, data.tracking_no, data.parent_order_id, data.img_url,
province_id, data.activity_reduce_amount, data.coupon_reduce_amount, original_total_amount, data.
freight_fee, data.freight_fee_reduce, data.refundable_time> comment '数据',
`old` map<string,string> comment '旧值'
) comment '订单表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_order_info_inc/';
装载数据
-- 装载数据
set hivevar:do_date = 2020-06-14;
load data inpath '/origin_data/gmall/db/order_info_inc/${do_date}'
into table ods_order_info_inc
partition (dt = '${do_date}');
查询数据
set hivevar:do_date = 2020-06-14;
select type,
ts,
data.id,
data.consignee,
data.consignee_tel,
data.total_amount,
data.order_status,
data.user_id,
data.payment_way,
data.delivery_address,
data.order_comment,
data.out_trade_no,
data.trade_body,
data.create_time,
data.operate_time,
data.expire_time,
data.process_status,
data.tracking_no,
data.parent_order_id,
data.img_url,
data.province_id,
data.activity_reduce_amount,
data.coupon_reduce_amount,
data.original_total_amount,
data.freight_fee,
data.freight_fee_reduce,
data.refundable_time,
old,
dt
from ods_order_info_inc
where dt = '${do_date}'
limit 10;

退单表

-- 退单表
drop table if exists ods_order_refund_info_inc;
create external table ods_order_refund_info_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string,user_id :string,order_id :string,sku_id :string,refund_type :string,refund_num :bigint,refund_amount
:decimal(16, 2),refund_reason_type :string,refund_reason_txt :string,refund_status :string,create_time
:string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '退单表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_order_refund_info_inc/';
装载数据
-- 装载数据
set hivevar:do_date = 2020-06-14;
load data inpath '/origin_data/gmall/db/order_refund_info_inc/${do_date}'
into table ods_order_refund_info_inc
partition (dt = '${do_date}');
查询数据
-- 查询数据
select type,
ts,
data.id,
data.user_id,
data.order_id,
data.sku_id,
data.refund_type,
data.refund_num,
data.refund_amount,
data.refund_reason_type,
data.refund_reason_txt,
data.refund_status,
data.create_time,
old,
dt
from ods_order_refund_info_inc
where dt = '${do_date}'
limit 10;

订单状态流水表

-- 创建订单状态流水表
drop table if exists ods_order_status_log_inc;
create external table ods_order_status_log_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string,order_id :string,order_status :string,operate_time :string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '订单状态流水表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_order_status_log_inc/';
-- 订单状态流水表
set hivevar:do_date = 2020-06-14;
load data inpath '/origin_data/gmall/db/order_status_log_inc/${do_date}'
into table ods_order_status_log_inc
partition (dt = '${do_date}');
-- 查询数据
select type,
ts,
data.id,
data.order_id,
data.order_status,
data.operate_time,
old,
dt
from ods_order_status_log_inc
where dt = '${do_date}'
limit 10;

支付表

-- 查询支付表
drop table if exists ods_payment_info_inc;
create external table ods_payment_info_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string,out_trade_no :string,order_id :string,user_id :string,payment_type :string,trade_no
:string,total_amount :decimal(16, 2),subject :string,payment_status :string,create_time :string,callback_time
:string,callback_content :string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '支付表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_payment_info_inc/';
-- 订单状态流水表
set hivevar:do_date = 2020-06-14;
load data inpath '/origin_data/gmall/db/payment_info_inc/${do_date}'
into table ods_payment_info_inc
partition (dt = '${do_date}');
-- 查询数据
select type,
ts,
data.id,
data.out_trade_no,
data.order_id,
data.user_id,
data.payment_type,
data.trade_no,
data.total_amount,
data.subject,
data.payment_status,
data.create_time,
data.callback_time,
data.callback_content,
old,
dt
from ods_payment_info_inc
where dt = '${do_date}'
limit 10;

退款表

-- 退款表
drop table if exists ods_refund_payment_inc;
create external table ods_refund_payment_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string,out_trade_no :string,order_id :string,sku_id :string,payment_type :STRING,trade_no :string,total_amount
:decimal(16, 2),subject :string,refund_status :string,create_time :string,callback_time :string,callback_content
:string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '退款表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_refund_payment_inc/';
-- 装载数据
set hivevar:do_date = 2020-06-14;
load data inpath '/origin_data/gmall/db/refund_payment_inc/${do_date}'
into table ods_refund_payment_inc
partition (dt = '${do_date}');
-- 查询数据
select type,
ts,
data.id,
data.out_trade_no,
data.order_id,
data.sku_id,
data.payment_type,
data.trade_no,
data.total_amount,
data.subject,
data.refund_status,
data.create_time,
data.callback_time,
data.callback_content,
old,
dt
from ods_refund_payment_inc
where dt = '${do_date}'
limit 10;

用户表

-- 创建用户表
drop table if exists ods_user_info_inc;
create external table ods_user_info_inc
(
`type` string comment '变动类型',
`ts` bigint comment '变动时间',
`data` struct<id :string,login_name :string,nick_name :string,passwd :string,name :string,phone_num :string,email
:string,head_img :string,user_level :string,birthday :string,gender :string,create_time :string,operate_time
:string,status :string> comment '数据',
`old` map<string,string> comment '旧值'
) comment '用户表'
partitioned by (`dt` string)
row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
location '/warehouse/gmall/ods/ods_user_info_inc/';
-- 装载数据
load data inpath '/origin_data/gmall/db/user_info_inc/2020-06-14'
into table ods_user_info_inc
partition (dt = '2020-06-14');
-- 查询数据
select type,
ts,
data.id,
data.login_name,
data.nick_name,
data.passwd,
data.name,
data.phone_num,
data.email,
data.head_img,
data.user_level,
data.birthday,
data.gender,
data.create_time,
data.operate_time,
data.status,
old,
dt
from ods_user_info_inc
where dt = '2020-06-14'
limit 10;

数据装载脚本
在 cpu101 的 /home/cpu/bin 目录下创建 hdfs_to_ods_db.sh
vim hdfs_to_ods_db.sh
#!/bin/bash
APP=gmall
if [ -n "$2" ] ;then
do_date=$2
else
do_date=`date -d '-1 day' +%F`
fi
load_data(){
sql=""
for i in $*; do
#判断路径是否存在
hadoop fs -test -e /origin_data/$APP/db/${i:4}/$do_date
#路径存在方可装载数据
if [[ $? = 0 ]]; then
sql=$sql"load data inpath '/origin_data/$APP/db/${i:4}/$do_date' OVERWRITE into table ${APP}.$i partition(dt='$do_date');"
fi
done
hive -e "$sql"
}
case $1 in
"ods_activity_info_full")
load_data "ods_activity_info_full"
;;
"ods_activity_rule_full")
load_data "ods_activity_rule_full"
;;
"ods_base_category1_full")
load_data "ods_base_category1_full"
;;
"ods_base_category2_full")
load_data "ods_base_category2_full"
;;
"ods_base_category3_full")
load_data "ods_base_category3_full"
;;
"ods_base_dic_full")
load_data "ods_base_dic_full"
;;
"ods_base_province_full")
load_data "ods_base_province_full"
;;
"ods_base_region_full")
load_data "ods_base_region_full"
;;
"ods_base_trademark_full")
load_data "ods_base_trademark_full"
;;
"ods_cart_info_full")
load_data "ods_cart_info_full"
;;
"ods_coupon_info_full")
load_data "ods_coupon_info_full"
;;
"ods_sku_attr_value_full")
load_data "ods_sku_attr_value_full"
;;
"ods_sku_info_full")
load_data "ods_sku_info_full"
;;
"ods_sku_sale_attr_value_full")
load_data "ods_sku_sale_attr_value_full"
;;
"ods_spu_info_full")
load_data "ods_spu_info_full"
;;
"ods_cart_info_inc")
load_data "ods_cart_info_inc"
;;
"ods_comment_info_inc")
load_data "ods_comment_info_inc"
;;
"ods_coupon_use_inc")
load_data "ods_coupon_use_inc"
;;
"ods_favor_info_inc")
load_data "ods_favor_info_inc"
;;
"ods_order_detail_inc")
load_data "ods_order_detail_inc"
;;
"ods_order_detail_activity_inc")
load_data "ods_order_detail_activity_inc"
;;
"ods_order_detail_coupon_inc")
load_data "ods_order_detail_coupon_inc"
;;
"ods_order_info_inc")
load_data "ods_order_info_inc"
;;
"ods_order_refund_info_inc")
load_data "ods_order_refund_info_inc"
;;
"ods_order_status_log_inc")
load_data "ods_order_status_log_inc"
;;
"ods_payment_info_inc")
load_data "ods_payment_info_inc"
;;
"ods_refund_payment_inc")
load_data "ods_refund_payment_inc"
;;
"ods_user_info_inc")
load_data "ods_user_info_inc"
;;
"all")
load_data "ods_activity_info_full" "ods_activity_rule_full" "ods_base_category1_full" "ods_base_category2_full" "ods_base_category3_full" "ods_base_dic_full" "ods_base_province_full" "ods_base_region_full" "ods_base_trademark_full" "ods_cart_info_full" "ods_coupon_info_full" "ods_sku_attr_value_full" "ods_sku_info_full" "ods_sku_sale_attr_value_full" "ods_spu_info_full" "ods_cart_info_inc" "ods_comment_info_inc" "ods_coupon_use_inc" "ods_favor_info_inc" "ods_order_detail_inc" "ods_order_detail_activity_inc" "ods_order_detail_coupon_inc" "ods_order_info_inc" "ods_order_refund_info_inc" "ods_order_status_log_inc" "ods_payment_info_inc" "ods_refund_payment_inc" "ods_user_info_inc"
;;
esac

增加脚本执行权限
chmod 777 hdfs_to_ods_db.sh

脚本用法 :
hdfs_to_ods_db.sh all 2020-06-14

