hive 调度脚本
一、load to ods
不带kerbo认证
#!/bin/bashAPP=gmallhive=/opt/module/hive/bin/hive# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天if [ -n "$1" ] ;thendo_date=$1elsedo_date=`date -d "-1 day" +%F`fisql="load data inpath '/origin_data/$APP/db/order_info/$do_date' OVERWRITE into table "$APP".ods_order_info partition(dt='$do_date');load data inpath '/origin_data/$APP/db/order_detail/$do_date' OVERWRITE into table "$APP".ods_order_detail partition(dt='$do_date');load data inpath '/origin_data/$APP/db/sku_info/$do_date' OVERWRITE into table "$APP".ods_sku_info partition(dt='$do_date');load data inpath '/origin_data/$APP/db/user_info/$do_date' OVERWRITE into table "$APP".ods_user_info partition(dt='$do_date');load data inpath '/origin_data/$APP/db/payment_info/$do_date' OVERWRITE into table "$APP".ods_payment_info partition(dt='$do_date');load data inpath '/origin_data/$APP/db/base_category1/$do_date' OVERWRITE into table "$APP".ods_base_category1 partition(dt='$do_date');load data inpath '/origin_data/$APP/db/base_category2/$do_date' OVERWRITE into table "$APP".ods_base_category2 partition(dt='$do_date');load data inpath '/origin_data/$APP/db/base_category3/$do_date' OVERWRITE into table "$APP".ods_base_category3 partition(dt='$do_date');"$hive -e "$sql"
带kerbo认证
#!/bin/bash
kinit -kt /var/lib/hive/hive.keytab hive/hive
# 定义变量方便修改
APP=gmall
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
echo "===日志日期为 $do_date==="
sql="
load data inpath '/origin_data/gmall/log/topic_start/$do_date' into table "$APP".ods_start_log partition(dt='$do_date');
"
beeline -u "jdbc:hive2://hadoop105:10000/;principal=hive/hadoop105@ATGUIGU.COM" -n hive -e "$sql"
二、ods to dwd
#!/bin/bash
hive=/opt/module/hive/bin/hive
APP=gmall
if [ -n "$1" ];then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
echo "-----------导入日期$do_date-----------"
sql="
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table "$APP".dwd_user_info partition(dt)
select id,
name,
birthday,
gender,
email,
user_level,
create_time,
dt
from "$APP".ods_user_info
where dt='$do_date' and id is not null;
insert overwrite table "$APP".dwd_sku_info partition(dt)
select sku.id,
sku.spu_id,
sku.price,
sku.sku_name,
sku.sku_desc,
sku.weight,
sku.tm_id,
c3.id category3_id,
c2.id category2_id,
c1.id category1_id,
c3.name category3_name,
c2.name category2_name,
c1.name category1_name,
sku.create_time,
sku.dt
from "$APP".ods_sku_info sku
left join
"$APP".ods_base_category3 c3 on sku.category3_id=c3.id
left join
"$APP".ods_base_category2 c2 on c3.category2_id=c2.id
left join
"$APP".ods_base_category1 c1 on c2.category1_id=c1.id
where sku.dt='$do_date' and sku.id is not null;
insert overwrite table "$APP".dwd_payment_info partition(dt)
select id,
out_trade_no,
order_id,
user_id,
alipay_trade_no,
total_amount,
subject,
payment_type,
payment_time,
dt
from "$APP".ods_payment_info
where dt='$do_date' and id is not null;
insert overwrite table "$APP".dwd_order_detail partition(dt)
select id,
order_id,
sku_id,
sku_name,
img_url,
order_price,
sku_num,
create_time,
dt
from "$APP".ods_order_detail
where dt='$do_date' and id is not null;
insert overwrite table "$APP".dwd_order_info partition(dt)
select id,
total_amount,
order_status,
user_id,
payment_way,
out_trade_no,
create_time,
operate_time,
dt
from "$APP".ods_order_info
where dt='$do_date' and id is not null;
"
$hive -e "$sql"
三、dwd to dws
#!/bin/bash
# 定义变量方便修改
APP=gmall
hive=/opt/module/hive/bin/hive
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
with
tmp_order as
(
select
user_id,
sum(oi.total_amount) order_amount,
count(*) order_count
from "$APP".dwd_order_info oi
where date_format(oi.create_time,'yyyy-MM-dd')='$do_date'
group by user_id
) ,
tmp_payment as
(
select
user_id,
sum(pi.total_amount) payment_amount,
count(*) payment_count
from "$APP".dwd_payment_info pi
where date_format(pi.payment_time,'yyyy-MM-dd')='$do_date'
group by user_id
),
tmp_comment as
(
select
user_id,
count(*) comment_count
from "$APP".dwd_comment_log c
where date_format(c.dt,'yyyy-MM-dd')='$do_date'
group by user_id
)
Insert overwrite table "$APP".dws_user_action partition(dt='$do_date')
select
user_actions.user_id,
sum(user_actions.order_count),
sum(user_actions.order_amount),
sum(user_actions.payment_count),
sum(user_actions.payment_amount),
sum(user_actions.comment_count)
from
(
select
user_id,
order_count,
order_amount,
0 payment_count,
0 payment_amount,
0 comment_count
from tmp_order
union all
select
user_id,
0,
0,
payment_count,
payment_amount,
0
from tmp_payment
union all
select
user_id,
0,
0,
0,
0,
comment_count
from tmp_comment
) user_actions
group by user_id;
"
$hive -e "$sql"
四、dws to ads
#!/bin/bash
# 定义变量方便修改
APP=gmall
hive=/opt/module/hive/bin/hive
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$1" ] ;then
do_date=$1
else
do_date=`date -d "-1 day" +%F`
fi
sql="
set hive.exec.dynamic.partition.mode=nonstrict;
insert into table "$APP".ads_sale_tm_category1_stat_mn
select
mn.sku_tm_id,
mn.sku_category1_id,
mn.sku_category1_name,
sum(if(mn.order_count>=1,1,0)) buycount,
sum(if(mn.order_count>=2,1,0)) buyTwiceLast,
sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0)) buyTwiceLastRatio,
sum(if(mn.order_count>=3,1,0)) buy3timeLast,
sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0)) buy3timeLastRatio ,
date_format('$do_date' ,'yyyy-MM') stat_mn,
'$do_date' stat_date
from
(
select
user_id,
od.sku_tm_id,
od.sku_category1_id,
od.sku_category1_name,
sum(order_count) order_count
from "$APP".dws_sale_detail_daycount od
where date_format(dt,'yyyy-MM')=date_format('$do_date' ,'yyyy-MM')
group by user_id, od.sku_tm_id, od.sku_category1_id, od.sku_category1_name
) mn
group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;
"
$hive -e "$sql"
sqoop 调度脚本
一、sqoop import
#!/bin/bash
#kerb 认证
kinit -kt /var/lib/hive/hive.keytab hive/hive
db_date=$2
echo $db_date
db_name=gmall
#指定hadoop user
HADOOP_USER_NAME=hive
import_data() {
sqoop import \
--connect jdbc:mysql://hadoop105:3306/$db_name \
--username root \
--password 000000 \
--target-dir /origin_data/$db_name/db/$1/$db_date \
--delete-target-dir \
--num-mappers 1 \
--fields-terminated-by "\t" \
--query "$2"' and $CONDITIONS;'
}
import_sku_info(){
import_data "sku_info" "select
id, spu_id, price, sku_name, sku_desc, weight, tm_id,
category3_id, create_time
from sku_info where 1=1"
}
import_user_info(){
import_data "user_info" "select
id, name, birthday, gender, email, user_level,
create_time
from user_info where 1=1"
}
import_base_category1(){
import_data "base_category1" "select
id, name from base_category1 where 1=1"
}
import_base_category2(){
import_data "base_category2" "select
id, name, category1_id from base_category2 where 1=1"
}
import_base_category3(){
import_data "base_category3" "select id, name, category2_id from base_category3 where 1=1"
}
import_order_detail(){
import_data "order_detail" "select
od.id,
order_id,
user_id,
sku_id,
sku_name,
order_price,
sku_num,
o.create_time
from order_info o , order_detail od
where o.id=od.order_id
and DATE_FORMAT(create_time,'%Y-%m-%d')='$db_date'"
}
import_payment_info(){
import_data "payment_info" "select
id,
out_trade_no,
order_id,
user_id,
alipay_trade_no,
total_amount,
subject,
payment_type,
payment_time
from payment_info
where DATE_FORMAT(payment_time,'%Y-%m-%d')='$db_date'"
}
import_order_info(){
import_data "order_info" "select
id,
total_amount,
order_status,
user_id,
payment_way,
out_trade_no,
create_time,
operate_time
from order_info
where (DATE_FORMAT(create_time,'%Y-%m-%d')='$db_date' or DATE_FORMAT(operate_time,'%Y-%m-%d')='$db_date')"
}
case $1 in
"base_category1")
import_base_category1
;;
"base_category2")
import_base_category2
;;
"base_category3")
import_base_category3
;;
"order_info")
import_order_info
;;
"order_detail")
import_order_detail
;;
"sku_info")
import_sku_info
;;
"user_info")
import_user_info
;;
"payment_info")
import_payment_info
;;
"all")
import_base_category1
import_base_category2
import_base_category3
import_order_info
import_order_detail
import_sku_info
import_user_info
import_payment_info
;;
esac
二、sqoop export
#!/bin/bash
db_name=gmall
export_data() {
sqoop export \
--connect "jdbc:mysql://hadoop105:3306/${db_name}?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password 000000 \
--table $1 \
--num-mappers 1 \
--export-dir /user/hive/warehouse/$db_name/ads/$1 \
--input-fields-terminated-by "\t" \
--update-mode allowinsert \
--update-key "tm_id,category1_id,stat_mn,stat_date" \
--input-null-string '\\N' \
--input-null-non-string '\\N'
}
case $1 in
"ads_gmv_sum_day")
export_data "ads_gmv_sum_day"
;;
"all")
export_data "ads_gmv_sum_day"
;;
esac
