- 节点间的参数传递
https://www.cnblogs.com/xing901022/p/6501448.html - shell action 传递参数示例
https://www.yuque.com/huioo/wdag30/of5v7u#44d0z - java action 传递参数示例
http://www.myexception.cn/open-source/1306509.html
oracle的某时间段内数据导入
预先创建hive数据库,该hdfs目录作为数据导入目录。 ```bash $ cd ~/Documents/hive/ $ bin/hive —service metastore
$ bin/hive hive> create database oracle; hive> dfs -ls /user/hive/warehouse; drwxr-xr-x - jack supergroup 0 2020-03-30 22:43 /user/hive/warehouse/oracle.db
<a name="tXyoo"></a>## java action传参java程序:传递运行参数开始时间、时间间隔,根据计算导入时间区间。参考oozie自带示例“java-main”。```bash$ cp -r examples/apps/java-main/ oozie-apps/java-main-capture$ touch lib/MyDateRangeWithOozie.java
MyDateRangeWithOozie.java
1.7版本
import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.OutputStream;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.Date;import java.util.Properties;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.conf.Configuration;public class MyDateRangeWithOozie {private static final String OOZIE_ACTION_OUTPUT_PROPERTIES = "oozie.action.output.properties";private static final SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");public static String addHoursFromStart(String start, int hours) throws ParseException {Date date = format.parse(start);long time = date.getTime();date.setTime(time + hours * 60 * 60 * 1000);String end = format.format(date);return end;}public static int getFileNum(FileSystem fs, Path path) throws IOException {if (!fs.exists(path))return 0;FileStatus[] files = fs.listStatus(path);return files.length;}public static void main(String[] args) throws IOException, ParseException {// System.out.println(addHoursFromStart("2016-3-12 00:00:00", 12));// 数据导入目录 时间间隔 开始时间if (args.length != 3) {System.out.println("Usage: MyDateRangeWithOozie " +"<target-dir path> " +"<start datetime> " +"<interval hour>");System.exit(1);}System.out.println("# Arguments: " + args.length);for (int i = 0; i < args.length; i++) {System.out.println("Argument[" + i + "]: " + args[i]);}// /user/sqoop/userString dirPath = args[0];// 2int hours = Integer.parseInt(args[1]);// 2016-3-12 00:00:00String start = args[2];String end;Configuration conf = new Configuration();FileSystem fs = FileSystem.get(conf);Path path = new Path(dirPath);int num = getFileNum(fs, path);end = addHoursFromStart(start, hours * (num+1));start = addHoursFromStart(start, hours * num);String oozieProp = System.getProperty(OOZIE_ACTION_OUTPUT_PROPERTIES);if (oozieProp != null) {File propFile = new File(oozieProp);Properties props = new Properties();props.setProperty("start", start);props.setProperty("end", end);OutputStream os = new FileOutputStream(propFile);props.store(os, "");os.flush();os.close();} elsethrow new RuntimeException(OOZIE_ACTION_OUTPUT_PROPERTIES+ " System property not defined");}}
workflow.xml
- 注意start、end的值。
```xml
<java><job-tracker>${jobTracker}</job-tracker><name-node>${nameNode}</name-node><configuration><property><name>mapred.job.queue.name</name><value>${queueName}</value></property></configuration><main-class>MyDateRangeWithOozie</main-class><arg>${sqoopTargetDir}</arg><arg>${intervalHours}</arg><arg>${startTime}</arg><capture-output/></java><ok to="check-output"/><error to="fail"/>
<switch><case to="end">${wf:actionData('java-node')['end'] eq '2016-03-11 12:00:00'}</case><default to="fail-output"/></switch>
<message>Java failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
<message>start [${wf:actionData('java-node')['start']}]; end [${wf:actionData('java-node')['end']}];</message>
<a name="AKUFc"></a>### job.properties```javanameNode=hdfs://192.168.32.130:8020jobTracker=192.168.32.130:8032queueName=defaultexamplesRoot=oozie-appsoozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/java-main-capturesqoopTargetDir=/user/hive/warehouse/oracle.db/h_log_lotbaseintervalHours=12startTime=2016-3-11 00:00:00
运行测试
编译打jar包,运行测试:
# 独立(本地)模式安装的Hadoop目录$ export HADOOP_HOME=~/Documents/hadoop/# 查找编译时需要用到的class文件,添加到CLASSPATH$ export CLASSPATH=$CLASSPATH:$HADOOP_HOME/share/hadoop/common/hadoop-common-2.5.0-cdh5.3.6.jar$ cd oozie-apps/java-main-capture/lib/$ javac MyDateRangeWithOozie.java$ jar -cvf MyDateRangeWithOozie.jar MyDateRangeWithOozie.classadded manifestadding: MyDateRangeWithOozie.class(in = 3059) (out= 1646)(deflated 46%)$ cd ../../../$ ~/Documents/hadoop/bin/hadoop fs -rm -r -f oozie-apps/java-main-capture/$ ~/Documents/hadoop/bin/hadoop fs -put -f oozie-apps/java-main-capture/ oozie-apps$ export OOZIE_URL="http://192.168.32.130:11000/oozie"$ bin/oozie job -config oozie-apps/java-main-capture/job.properties -run -verbose -debugjob: 0000000-200518015706412-oozie-jack-W$ bin/oozie job -info 0000000-200518015706412-oozie-jack-W
sqoop action
job.properties
添加下面一行
oozie.use.system.libpath=true
workflow.xml
更改action节点,增加sqoop任务。与前面比较:
删去decision action与kill action
<workflow-app xmlns="uri:oozie:workflow:0.2" name="java-main-wf"> <start to="java-node"/> <action name="java-node"> <java> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <configuration> <property> <name>mapred.job.queue.name</name> <value>${queueName}</value> </property> </configuration> <main-class>MyDateRangeWithOozie</main-class> <arg>${sqoopTargetDir}</arg> <arg>${intervalHours}</arg> <arg>${startTime}</arg> <capture-output/> </java> <ok to="sqoop-freeform-node"/> <error to="fail"/> </action> <action name="sqoop-freeform-node"> <sqoop xmlns="uri:oozie:sqoop-action:0.2"> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <prepare> <mkdir path="${sqoopTargetDir}"/> </prepare> <configuration> <property> <name>mapred.job.queue.name</name> <value>${queueName}</value> </property> </configuration> <arg>import</arg> <arg>--connect</arg> <arg>jdbc:oracle:thin:@//192.168.1.38:1521/CMASPROD</arg> <arg>--username</arg> <arg>mes_bc</arg> <arg>--password-file</arg> <arg>38oracle.pwd</arg> <arg>--target-dir</arg> <arg>${sqoopTargetDir}</arg> <arg>--mysql-delimiters</arg> <arg>--table</arg> <arg>H_LOG_LOTBASE</arg> <arg>--where</arg> <arg><![CDATA[ TIME_LOGGED >= to_date('${wf:actionData("java-node")["start"]}', 'yyyy-mm-dd HH24:MI:SS') and TIME_LOGGED < to_date('${wf:actionData("java-node")["end"]}', 'yyyy-mm-dd HH24:MI:SS') ]]></arg> <arg>--append</arg> <arg>-m</arg> <arg>1</arg> <archive>lib/ojdbc7.jar</archive> </sqoop> <ok to="end"/> <error to="fail"/> </action> <kill name="fail"> <message>Workflow failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> </kill> <end name="end"/> </workflow-app>运行测试
```bash $ cd ~/Documents/sqoop $ echo -n “mes_bc_real” > 38oracle.pwd $ ~/Documents/hadoop/bin/hadoop fs -put 38oracle.pwd
$ cd ~/Documents/oozie
拷贝jdbc驱动
$ cp ~/Documents/sqoop/lib/ojdbc7.jar oozie-apps/java-main-capture/lib/ $ ~/Documents/hadoop/bin/hadoop fs -rm -r -f oozie-apps/java-main-capture/ $ ~/Documents/hadoop/bin/hadoop fs -put -f oozie-apps/java-main-capture/ oozie-apps $ export OOZIE_URL=”http://192.168.32.130:11000/oozie“
第一次导入,数据<2016-3-11 00:00:00, 2016-3-11 12:00:00>
$ bin/oozie job -config oozie-apps/java-main-capture/job.properties -run -verbose -debug job: 0000001-200518015706412-oozie-jack-W $ bin/oozie job -info 0000001-200518015706412-oozie-jack-W
第二次导入,数据<2016-3-11 12:00:00, 2016-3-12 00:00:00>
$ bin/oozie job -config oozie-apps/java-main-capture/job.properties -run -verbose -debug
列出文件
$ ~/Documents/hadoop/bin/hadoop fs -ls /user/sqoop/oracle/h_log_lotbase Found 2 itemsk0 -rw-r—r— 1 jack supergroup 723872 2020-05-19 18:36 /user/sqoop/oracle/h_log_lotbase/part-m-00000 -rw-r—r— 1 jack supergroup 695022 2020-05-19 18:43 /user/sqoop/oracle/h_log_lotbase/part-m-00001
查看head
$ ~/Documents/hadoop/bin/hadoop fs -cat /user/sqoop/oracle/h_log_lotbase/part-m-00000 | head a1e8dc24-40a0-4536-aeea-918890b7130a,2016-03-11 08:58:26.0,COMPLETE,11,3,2016,11,null,null,L216-1603080205,L216-1603080205,2310216030370,8,2016-03-11 08:58:26.0,A1301005,MACPRO_NORMAL_01,ROUTE_SET,ROUTE_SET-B01-01,FALSE,216-1600298,null,8,8,0,0,MACPRO_NORMAL_01,MACPRO_NORMAL_01,ROUTE_SET,PROGRAM,INITIAL,WAIT,null,2016-03-11 08:58:26.0,2016-03-11 08:58:26.0,null,RELEASED,ACTIVE,PCS,2016-03-13 00:00:00.0,null,null,null,null,null,null,null,null ··· ··· $ ~/Documents/hadoop/bin/hadoop fs -cat /user/sqoop/oracle/h_log_lotbase/part-m-00001 | head 0b9e1276-dacf-49f4-b999-b7330c1a1125,2016-03-11 13:00:18.0,COMPLETE,11,3,2016,11,null,null,L216-1602260340,L216-1602260340,2310215121893,2,2016-03-11 13:00:18.0,A1504063,MACPRO_NORMAL_01,MACPRC,CNC-A01-08,FALSE,216-1600235,null,2,2,0,0,MACPRO_NORMAL_01,MACPRO_NORMAL_01,MACPRC,MACPRC,IN_PROCESS,WAIT,null,2016-03-11 11:05:44.0,2016-03-11 13:00:18.0,null,RELEASED,ACTIVE,PCS,2016-03-15 00:00:00.0,null,null,null,null,null,null,null,null ··· ···
<a name="gdIyR"></a>
## hive操作
```bash
$ cd ~/Documents/hive/
$ bin/hive --service metastore
$ bin/hive
hive> use oracle;
hive> show tables;
# 查看导入的数据文件
hive> dfs -ls /user/hive/warehouse/oracle.db/h_log_lotbase;
# 创建(外部)表
hive> create external table h_log_lotbase(
log_id string,
time_logged string,
transaction_name string,
calendar_day string,
calendar_month string,
calendar_year string,
calendar_week string,
calendar_shift string,
comment_text string,
lot_id string,
sublot_id string,
part_no string,
qty double,
lm_time string,
lm_user string,
route_id string,
op_id string,
station_id string,
undone string,
wo_id_mo string,
ticket_id string,
output_qty double,
output_good_qty double,
output_ng_qty double,
scrapt_qty double,
from_route_id string,
to_route_id string,
from_op_id string,
to_op_id string,
from_work_state string,
to_work_state string,
pre_complete_time string,
start_time string,
complete_time string,
line_id string,
hold_status string,
quarantine_status string,
uom string,
due_date string,
stockin_ticket string,
spot_check_qty double,
fetch_qty double,
rou_oper_code string,
rou_oper_name string,
rou_oper_seq string,
session_id string,
scrapt_sublot string
)
row format delimited
fields terminated by ','
lines terminated by '\n';
hive> show tables;
# 导入数据文件
hive> load data inpath '/user/hive/warehouse/oracle.db/h_log_lotbase' into table h_log_lotbase;
# load data inpath '/user/sqoop/oracle/h_log_lotbase' into table h_log_lotbase;
# 分析数据
hive>
# 前10个(距今最近时间段) time_logged
select time_logged from h_log_lotbase order by time_logged desc limit 2;
2016-03-11 22:47:25.0
2016-03-11 22:38:50.0
# 前10个(距今最远时间段) time_logged
select time_logged from h_log_lotbase order by time_logged limit 2;
2016-03-11 08:35:24.0
2016-03-11 08:35:24.0
#
sqoop创建表结构
$ bin/sqoop create-hive-table \
--connect jdbc:oracle:thin:@//192.168.1.38:1521/CMASPROD \
--username mes_bc \
--password-file 38oracle.pwd \
--mysql-delimiters \
--table H_LOG_LOTBASE
