运行环境

  • hive metastore
  • oozie bootstrap

    oozie自带示例

    ```bash $ cd oozie/ $ cp -r examples/apps/hive/ oozie-apps/ $ cat oozie-apps/hive/ job.properties script.q workflow.xml
  1. <a name="OEap9"></a>
  2. ## job.properties

nameNode=hdfs://192.168.32.130:8020 jobTracker=192.168.32.130:8032 queueName=default examplesRoot=oozie-apps

oozie.use.system.libpath=true

oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/hive

  1. <a name="LOfqg"></a>
  2. ## workflow.xml
  3. 注意:
  4. - 输出、输入目录
  5. - <br />
  6. ```xml
  7. <workflow-app xmlns="uri:oozie:workflow:0.2" name="hive-wf">
  8. <start to="hive-node"/>
  9. <action name="hive-node">
  10. <hive xmlns="uri:oozie:hive-action:0.2">
  11. <job-tracker>${jobTracker}</job-tracker>
  12. <name-node>${nameNode}</name-node>
  13. <prepare>
  14. <delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/hive"/>
  15. <mkdir path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data"/>
  16. </prepare>
  17. <configuration>
  18. <property>
  19. <name>mapred.job.queue.name</name>
  20. <value>${queueName}</value>
  21. </property>
  22. </configuration>
  23. <script>script.q</script>
  24. <param>INPUT=/user/${wf:user()}/${examplesRoot}/input-data/table</param>
  25. <param>OUTPUT=/user/${wf:user()}/${examplesRoot}/output-data/hive</param>
  26. </hive>
  27. <ok to="end"/>
  28. <error to="fail"/>
  29. </action>
  30. <kill name="fail">
  31. <message>Hive failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
  32. </kill>
  33. <end name="end"/>
  34. </workflow-app>

script.q

  • 后缀名“.q”、“.hql”等
  • 该脚本,创建外部表test,加载文件形式为textfile,文件load地址“${INPUT}”;
    查询(覆盖)输出到地址“${OUTPUT}”;
    1. CREATE EXTERNAL TABLE test (a INT) STORED AS TEXTFILE LOCATION '${INPUT}';
    2. INSERT OVERWRITE DIRECTORY '${OUTPUT}' SELECT * FROM test WHERE a > 2;
    3. -- original
    4. -- INSERT OVERWRITE DIRECTORY '${OUTPUT}' SELECT * FROM test;
    已显示区别,修改查询语句:“ SELECT FROM test *WHERE a > 2”。

    执行测试

    准备并上传测试数据
    1. $ mkdir -p oozie-apps/input-data/table
    2. $ echo 1 > oozie-apps/input-data/table/part-m-00000
    3. $ echo 2 >> oozie-apps/input-data/table/part-m-00000
    4. $ echo 3 >> oozie-apps/input-data/table/part-m-00000
    5. $ echo 4 >> oozie-apps/input-data/table/part-m-00000
    6. $ ~/Documents/hadoop/bin/hadoop fs -put -f oozie-apps/input-data/table oozie-apps/input-data/
    上传oozie任务并运行测试
    1. $ ~/Documents/hadoop/bin/hadoop fs -put -f oozie-apps/hive oozie-apps
    2. $ export OOZIE_URL="http://192.168.32.130:11000/oozie"
    3. $ bin/oozie job -config oozie-apps/hive/job.properties -run -verbose -debug
    4. $ bin/oozie job -info 0000068-200513181559469-oozie-jack-C
    5. # 查看结果
    6. $ ~/Documents/hadoop/bin/hadoop fs -cat /user/jack/oozie-apps/output-data/hive/*
    7. 3
    8. 4

    自定义hive表user分析

    1. $ cd oozie/
    2. $ cp -r oozie-apps/hive oozie-apps/hive-user
    3. $ ls oozie-apps/hive-user
    4. job.properties script.q workflow.xml

    job.properties

    ``` nameNode=hdfs://192.168.32.130:8020 jobTracker=192.168.32.130:8032 queueName=default examplesRoot=oozie-apps

oozie.use.system.libpath=true

oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/hive-user

  1. <a name="mNIdv"></a>
  2. ## script.q
  3. ```sql
  4. -- hive> desc user;
  5. -- OK
  6. -- id int
  7. -- name string
  8. -- 查询所有name长度大于4的记录
  9. INSERT OVERWRITE DIRECTORY '${OUTPUT}' SELECT id FROM default.user WHERE length(name) > 4;

测试(修改 ${OUTPUT} 进行测试)

  1. $ cp ~/Documents/oozie/oozie-apps/hive-user/script.q ~/Documents/hive/hql
  2. # ~/Documents/hive/bin/hive -e \
  3. # " \
  4. # INSERT OVERWRITE DIRECTORY '/user/jack/oozie-apps/output-data/hive-user' SELECT id FROM default.user WHERE length(name) > 4; \
  5. # "
  6. # ${OUTPUT} -> /user/jack/oozie-apps/output-data/hive-user
  7. $ ~/Documents/hive/bin/hive -f ~/Documents/hive/hql/script.q
  8. $ ~/Documents/hive/bin/hive
  9. hive> select * from user;
  10. OK
  11. 1 jack
  12. 2 tom
  13. 3 white
  14. 4 black
  15. 5 ming
  16. 6 ning
  17. 7 zhangsan
  18. 8 lisi
  19. 9 wangwu
  20. 10 zhaoliu
  21. 11 aaaaa
  22. 12 bbbbb

workflow.xml

  1. <workflow-app xmlns="uri:oozie:workflow:0.2" name="hive-wf">
  2. <start to="hive-node"/>
  3. <action name="hive-node">
  4. <hive xmlns="uri:oozie:hive-action:0.2">
  5. <job-tracker>${jobTracker}</job-tracker>
  6. <name-node>${nameNode}</name-node>
  7. <prepare>
  8. <delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/hive-user"/>
  9. <mkdir path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data"/>
  10. </prepare>
  11. <configuration>
  12. <property>
  13. <name>mapred.job.queue.name</name>
  14. <value>${queueName}</value>
  15. </property>
  16. <property>
  17. <name>hive.metastore.uris</name>
  18. <value>thrift://192.168.32.130:9083</value>
  19. </property>
  20. </configuration>
  21. <script>script.q</script>
  22. <param>OUTPUT=/user/${wf:user()}/${examplesRoot}/output-data/hive-user</param>
  23. </hive>
  24. <ok to="end"/>
  25. <error to="fail-hive-node"/>
  26. </action>
  27. <kill name="fail">
  28. <message>Hive failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
  29. </kill>
  30. <end name="end"/>
  31. </workflow-app>

执行测试

上传oozie任务并运行测试

  1. $ ~/Documents/hadoop/bin/hadoop fs -put -f oozie-apps/hive-user oozie-apps
  2. $ export OOZIE_URL="http://192.168.32.130:11000/oozie"
  3. $ bin/oozie job -config oozie-apps/hive-user/job.properties -run -verbose -debug
  4. $ bin/oozie job -info 0000101-200513181559469-oozie-jack-W
  5. Job ID : 0000101-200513181559469-oozie-jack-W
  6. ------------------------------------------------------------------------------------------------------------------------------------
  7. Workflow Name : hive-wf
  8. App Path : hdfs://192.168.32.130:8020/user/jack/oozie-apps/hive-user
  9. Status : SUCCEEDED
  10. Run : 0
  11. User : jack
  12. Group : -
  13. Created : 2020-05-18 06:16 GMT
  14. Started : 2020-05-18 06:16 GMT
  15. Last Modified : 2020-05-18 06:17 GMT
  16. Ended : 2020-05-18 06:17 GMT
  17. CoordAction ID: -
  18. Actions
  19. ------------------------------------------------------------------------------------------------------------------------------------
  20. ID Status Ext ID Ext Status Err Code
  21. ------------------------------------------------------------------------------------------------------------------------------------
  22. 0000101-200513181559469-oozie-jack-W@:start: OK - OK -
  23. ------------------------------------------------------------------------------------------------------------------------------------
  24. 0000101-200513181559469-oozie-jack-W@hive-node OK job_1588552715711_0206 SUCCEEDED -
  25. ------------------------------------------------------------------------------------------------------------------------------------
  26. 0000101-200513181559469-oozie-jack-W@end OK - OK -
  27. ------------------------------------------------------------------------------------------------------------------------------------
  28. # 查看结果
  29. $ ~/Documents/hadoop/bin/hadoop fs -cat /user/jack/oozie-apps/output-data/hive-user/*
  30. 3
  31. 4
  32. 7
  33. 9
  34. 10
  35. 11
  36. 12
  37. 13
  38. 14