sqoop导入hsqldb数据示例

  1. $ cp -r examples/apps/sqoop* oozie-apps/
  2. $ ls oozie-apps/sqoop*
  3. oozie-apps/sqoop:
  4. db.hsqldb.properties db.hsqldb.script job.properties workflow.xml
  5. oozie-apps/sqoop-freeform:
  6. db.hsqldb.properties db.hsqldb.script job.properties workflow.xml

使用sqoop-freeform示例执行。

job.properties

  1. nameNode=hdfs://192.168.32.130:8020
  2. jobTracker=192.168.32.130:8032
  3. queueName=default
  4. examplesRoot=oozie-apps
  5. oozie.use.system.libpath=true
  6. oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/sqoop-freeform

workflow.xml

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <!--
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. -->
  17. <workflow-app xmlns="uri:oozie:workflow:0.2" name="sqoop-freeform-wf">
  18. <start to="sqoop-freeform-node"/>
  19. <action name="sqoop-freeform-node">
  20. <sqoop xmlns="uri:oozie:sqoop-action:0.2">
  21. <job-tracker>${jobTracker}</job-tracker>
  22. <name-node>${nameNode}</name-node>
  23. <prepare>
  24. <delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/sqoop-freeform"/>
  25. <mkdir path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data"/>
  26. </prepare>
  27. <configuration>
  28. <property>
  29. <name>mapred.job.queue.name</name>
  30. <value>${queueName}</value>
  31. </property>
  32. </configuration>
  33. <arg>import</arg>
  34. <arg>--connect</arg>
  35. <arg>jdbc:hsqldb:file:db.hsqldb</arg>
  36. <arg>--username</arg>
  37. <arg>sa</arg>
  38. <arg>--password</arg>
  39. <arg></arg>
  40. <arg>--verbose</arg>
  41. <arg>--query</arg>
  42. <arg>select TT.I, TT.S from TT where $CONDITIONS</arg>
  43. <arg>--target-dir</arg>
  44. <arg>/user/${wf:user()}/${examplesRoot}/output-data/sqoop-freeform</arg>
  45. <arg>-m</arg>
  46. <arg>1</arg>
  47. <file>db.hsqldb.properties#db.hsqldb.properties</file>
  48. <file>db.hsqldb.script#db.hsqldb.script</file>
  49. </sqoop>
  50. <ok to="end"/>
  51. <error to="fail"/>
  52. </action>
  53. <kill name="fail">
  54. <message>Sqoop free form failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
  55. </kill>
  56. <end name="end"/>
  57. </workflow-app>

db.hsqldb.properties

  1. #HSQL Database Engine 1.8.0.5
  2. #Tue Oct 05 11:20:19 SGT 2010
  3. hsqldb.script_format=0
  4. runtime.gc_interval=0
  5. sql.enforce_strict_size=false
  6. hsqldb.cache_size_scale=8
  7. readonly=false
  8. hsqldb.nio_data_file=true
  9. hsqldb.cache_scale=14
  10. version=1.8.0
  11. hsqldb.default_table_type=memory
  12. hsqldb.cache_file_scale=1
  13. hsqldb.log_size=200
  14. modified=no
  15. hsqldb.cache_version=1.7.0
  16. hsqldb.original_version=1.8.0
  17. hsqldb.compatible_version=1.8.0

db.hsqldb.script

  1. CREATE SCHEMA PUBLIC AUTHORIZATION DBA
  2. CREATE MEMORY TABLE TT(I INTEGER NOT NULL PRIMARY KEY,S VARCHAR(256))
  3. CREATE USER SA PASSWORD ""
  4. GRANT DBA TO SA
  5. SET WRITE_DELAY 10
  6. SET SCHEMA PUBLIC
  7. INSERT INTO TT VALUES(1,'a')
  8. INSERT INTO TT VALUES(2,'a')
  9. INSERT INTO TT VALUES(3,'a')

执行

  1. $ # 上传任务
  2. $ ~/Documents/hadoop/bin/hadoop fs -put oozie-apps/sqoop-freeform/ oozie-apps
  3. $ # 查看目录信息
  4. $ ~/Documents/hadoop/bin/hadoop fs -ls /user/jack/oozie-apps/sqoop-freeform
  5. Found 2 items
  6. Found 4 items
  7. -rw-r--r-- 1 jack supergroup 1208 2020-05-13 18:12 /user/jack/oozie-apps/sqoop-freeform/db.hsqldb.properties
  8. -rw-r--r-- 1 jack supergroup 1081 2020-05-13 18:12 /user/jack/oozie-apps/sqoop-freeform/db.hsqldb.script
  9. -rw-r--r-- 1 jack supergroup 1018 2020-05-13 18:12 /user/jack/oozie-apps/sqoop-freeform/job.properties
  10. -rw-r--r-- 1 jack supergroup 2508 2020-05-13 18:12 /user/jack/oozie-apps/sqoop-freeform/workflow.xml
  11. $ # 执行工作流任务
  12. $ bin/oozied.sh start
  13. $ export OOZIE_URL="http://192.168.32.130:11000/oozie"
  14. $ bin/oozie job -config oozie-apps/sqoop-freeform/job.properties -run
  15. job: 0000000-200513181559469-oozie-jack-W
  16. $ # 检查工作流任务状态,也可以通过Oozie web控制台检查工作流作业状态,http://localhost:11000/oozie
  17. $ bin/oozie job -info 0000000-200513181559469-oozie-jack-W
  18. Job ID : 0000000-200513181559469-oozie-jack-W
  19. ------------------------------------------------------------------------------------------------------------------------------------
  20. Workflow Name : sqoop-freeform-wf
  21. App Path : hdfs://192.168.32.130:8020/user/jack/oozie-apps/sqoop-freeform
  22. Status : RUNNING
  23. Run : 0
  24. User : jack
  25. Group : -
  26. Created : 2020-05-14 01:17 GMT
  27. Started : 2020-05-14 01:17 GMT
  28. Last Modified : 2020-05-14 01:17 GMT
  29. Ended : -
  30. CoordAction ID: -
  31. Actions
  32. ------------------------------------------------------------------------------------------------------------------------------------
  33. ID Status Ext ID Ext Status Err Code
  34. ------------------------------------------------------------------------------------------------------------------------------------
  35. 0000000-200513181559469-oozie-jack-W@:start: OK - OK -
  36. ------------------------------------------------------------------------------------------------------------------------------------
  37. 0000000-200513181559469-oozie-jack-W@sqoop-freeform-node RUNNING job_1588552715711_0025 RUNNING -
  38. ------------------------------------------------------------------------------------------------------------------------------------
  39. ··· ···
  40. Actions
  41. ------------------------------------------------------------------------------------------------------------------------------------
  42. ID Status Ext ID Ext Status Err Code
  43. ------------------------------------------------------------------------------------------------------------------------------------
  44. 0000000-200513181559469-oozie-jack-W@:start: OK - OK -
  45. ------------------------------------------------------------------------------------------------------------------------------------
  46. 0000000-200513181559469-oozie-jack-W@sqoop-freeform-node OK job_1588552715711_0025 SUCCEEDED -
  47. ------------------------------------------------------------------------------------------------------------------------------------
  48. 0000000-200513181559469-oozie-jack-W@end OK - OK -
  49. ------------------------------------------------------------------------------------------------------------------------------------
  50. $ # 查看输出内容
  51. $ ~/Documents/hadoop/bin/hadoop fs -ls /user/jack/oozie-apps/output-data/sqoop-freeform
  52. Found 2 items
  53. -rw-r--r-- 1 jack supergroup 0 2020-05-13 18:18 /user/jack/oozie-apps/output-data/sqoop-freeform/_SUCCESS
  54. -rw-r--r-- 1 jack supergroup 12 2020-05-13 18:18 /user/jack/oozie-apps/output-data/sqoop-freeform/part-m-00000
  55. $ ~/Documents/hadoop/bin/hadoop fs -cat /user/jack/oozie-apps/output-data/sqoop-freeform/part-m-00000
  56. 1,a
  57. 2,a
  58. 3,a

sqoop导入oracle数据

  1. $ cp -r oozie-apps/sqoop-freeform/ oozie-apps/sqoop-freeform-oracle
  2. $ rm -rf oozie-apps/sqoop-freeform-oracle/db.hsqldb.*

job.properties

  1. nameNode=hdfs://192.168.32.130:8020
  2. jobTracker=192.168.32.130:8032
  3. queueName=default
  4. examplesRoot=oozie-apps
  5. oozie.use.system.libpath=true
  6. oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/sqoop-freeform-oracle

workflow.xml

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <!--
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. -->
  17. <workflow-app xmlns="uri:oozie:workflow:0.2" name="sqoop-freeform-wf">
  18. <start to="sqoop-freeform-node"/>
  19. <action name="sqoop-freeform-node">
  20. <sqoop xmlns="uri:oozie:sqoop-action:0.2">
  21. <job-tracker>${jobTracker}</job-tracker>
  22. <name-node>${nameNode}</name-node>
  23. <prepare>
  24. <delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/sqoop-freeform-oracle/h_log_lotbase"/>
  25. <mkdir path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/sqoop-freeform-oracle/"/>
  26. </prepare>
  27. <configuration>
  28. <property>
  29. <name>mapred.job.queue.name</name>
  30. <value>${queueName}</value>
  31. </property>
  32. </configuration>
  33. <arg>import</arg>
  34. <arg>--connect</arg>
  35. <arg>jdbc:oracle:thin:@//192.168.1.38:1521/CMASPROD</arg>
  36. <arg>--username</arg>
  37. <arg>mes_bc</arg>
  38. <arg>--password</arg>
  39. <arg>mes_bc_real</arg>
  40. <arg>--table</arg>
  41. <arg>H_LOG_LOTBASE</arg>
  42. <arg>--where</arg>
  43. <arg><![CDATA[complete_time < to_date('2016:3:12 00:00:00', 'yyyy-mm-dd HH24:MI:SS')]]></arg>
  44. <arg>--delete-target-dir</arg>
  45. <arg>--target-dir</arg>
  46. <arg>/user/${wf:user()}/${examplesRoot}/output-data/sqoop-freeform-oracle/h_log_lotbase</arg>
  47. <arg>-m</arg>
  48. <arg>1</arg>
  49. <archive>ojdbc7.jar</archive>
  50. </sqoop>
  51. <ok to="end"/>
  52. <error to="fail"/>
  53. </action>
  54. <kill name="fail">
  55. <message>Sqoop free form failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
  56. </kill>
  57. <end name="end"/>
  58. </workflow-app>

执行

  1. $ # 上传任务 -f 覆盖
  2. $ ~/Documents/hadoop/bin/hadoop fs -put oozie-apps/sqoop-freeform-oracle/ oozie-apps
  3. $ # 查看目录信息
  4. $ ~/Documents/hadoop/bin/hadoop fs -ls /user/jack/oozie-apps/sqoop-freeform-oracle
  5. Found 2 items
  6. -rw-r--r-- 1 jack supergroup 1025 2020-05-13 18:56 /user/jack/oozie-apps/sqoop-freeform-oracle/job.properties
  7. -rw-r--r-- 1 jack supergroup 2503 2020-05-13 18:56 /user/jack/oozie-apps/sqoop-freeform-oracle/workflow.xml
  8. $ bin/oozied.sh start
  9. $ export OOZIE_URL="http://192.168.32.130:11000/oozie"
  10. $ bin/oozie job -config oozie-apps/sqoop-freeform-oracle/job.properties -run
  11. job: 0000015-200513181559469-oozie-jack-W
  12. $ bin/oozie job -info 0000015-200513181559469-oozie-jack-W
  13. Job ID : 0000015-200513181559469-oozie-jack-W
  14. ------------------------------------------------------------------------------------------------------------------------------------
  15. Workflow Name : sqoop-freeform-wf
  16. App Path : hdfs://192.168.32.130:8020/user/jack/oozie-apps/sqoop-freeform-oracle
  17. Status : RUNNING
  18. Run : 0
  19. User : jack
  20. Group : -
  21. Created : 2020-05-14 05:23 GMT
  22. Started : 2020-05-14 05:23 GMT
  23. Last Modified : 2020-05-14 05:23 GMT
  24. Ended : -
  25. CoordAction ID: -
  26. Actions
  27. ------------------------------------------------------------------------------------------------------------------------------------
  28. ID Status Ext ID Ext Status Err Code
  29. ------------------------------------------------------------------------------------------------------------------------------------
  30. 0000015-200513181559469-oozie-jack-W@:start: OK - OK -
  31. ------------------------------------------------------------------------------------------------------------------------------------
  32. 0000015-200513181559469-oozie-jack-W@sqoop-freeform-node RUNNING job_1588552715711_0046 RUNNING -
  33. ------------------------------------------------------------------------------------------------------------------------------------
  34. ··· ···
  35. ------------------------------------------------------------------------------------------------------------------------------------
  36. ID Status Ext ID Ext Status Err Code
  37. ------------------------------------------------------------------------------------------------------------------------------------
  38. 0000015-200513181559469-oozie-jack-W@:start: OK - OK -
  39. ------------------------------------------------------------------------------------------------------------------------------------
  40. 0000015-200513181559469-oozie-jack-W@sqoop-freeform-node OK job_1588552715711_0046 SUCCEEDED -
  41. ------------------------------------------------------------------------------------------------------------------------------------
  42. 0000015-200513181559469-oozie-jack-W@end OK - OK -
  43. ------------------------------------------------------------------------------------------------------------------------------------
  44. # 默认逗号分隔field
  45. $ ~/Documents/hadoop/bin/hadoop fs -cat \
  46. oozie-apps/output-data/sqoop-freeform-oracle/h_log_lotbase/*
  47. 914b7ee3-20c0-4289-8ca2-b883dad9b73a,2016-03-11 09:13:22.0,COMPLETE,11,3,2016,11,null,null,L216-1603040030,L216-1603040030,2310215012240,2,2016-03-11 09:13:22.0,A1301005,MACPRO_NORMAL_01,ROUTE_SET,ROUTE_SET-B01-01,FALSE,216-1600289,null,2,2,0,0,MACPRO_NORMAL_01,MACPRO_NORMAL_01,ROUTE_SET,PROGRAM,INITIAL,WAIT,null,2016-03-11 09:13:22.0,2016-03-11 09:13:22.0,null,RELEASED,ACTIVE,PCS,2016-03-21 00:00:00.0,null,null,null,null,null,null,null,null
  48. 5667f1e5-999e-4b6a-9021-653320e28349,2016-03-11 09:13:24.0,COMPLETE,11,3,2016,11,null,null,L216-1603040035,L216-1603040035,2310215082321,2,2016-03-11 09:13:24.0,A1301005,MACPRO_NORMAL_01,ROUTE_SET,ROUTE_SET-B01-01,FALSE,216-1600289,null,2,2,0,0,MACPRO_NORMAL_01,MACPRO_NORMAL_01,ROUTE_SET,PROGRAM,INITIAL,WAIT,null,2016-03-11 09:13:24.0,2016-03-11 09:13:24.0,null,RELEASED,ACTIVE,PCS,2016-03-21 00:00:00.0,null,null,null,null,null,null,null,null

sqoop导入mysql数据

job.properties

  1. nameNode=hdfs://192.168.32.130:8020
  2. jobTracker=192.168.32.130:8032
  3. queueName=default
  4. examplesRoot=oozie-apps
  5. oozie.use.system.libpath=true
  6. oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/sqoop-freeform-mysql

workflow.xml

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <!--
  3. Licensed to the Apache Software Foundation (ASF) under one
  4. or more contributor license agreements. See the NOTICE file
  5. distributed with this work for additional information
  6. regarding copyright ownership. The ASF licenses this file
  7. to you under the Apache License, Version 2.0 (the
  8. "License"); you may not use this file except in compliance
  9. with the License. You may obtain a copy of the License at
  10. http://www.apache.org/licenses/LICENSE-2.0
  11. Unless required by applicable law or agreed to in writing, software
  12. distributed under the License is distributed on an "AS IS" BASIS,
  13. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. See the License for the specific language governing permissions and
  15. limitations under the License.
  16. -->
  17. <workflow-app xmlns="uri:oozie:workflow:0.2" name="sqoop-freeform-wf">
  18. <start to="sqoop-freeform-node"/>
  19. <action name="sqoop-freeform-node">
  20. <sqoop xmlns="uri:oozie:sqoop-action:0.2">
  21. <job-tracker>${jobTracker}</job-tracker>
  22. <name-node>${nameNode}</name-node>
  23. <prepare>
  24. <delete path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/sqoop-freeform-mysql/roles"/>
  25. <mkdir path="${nameNode}/user/${wf:user()}/${examplesRoot}/output-data/sqoop-freeform-mysql/"/>
  26. </prepare>
  27. <configuration>
  28. <property>
  29. <name>mapred.job.queue.name</name>
  30. <value>${queueName}</value>
  31. </property>
  32. </configuration>
  33. <arg>import</arg>
  34. <arg>--connect</arg>
  35. <arg>jdbc:mysql://192.168.32.130:3306/hive</arg>
  36. <arg>--username</arg>
  37. <arg>root</arg>
  38. <arg>--password</arg>
  39. <arg>123456</arg>
  40. <arg>--table</arg>
  41. <arg>ROLES</arg>
  42. <arg>--delete-target-dir</arg>
  43. <arg>--target-dir</arg>
  44. <arg>/user/${wf:user()}/${examplesRoot}/output-data/sqoop-freeform-mysql/roles</arg>
  45. <arg>-m</arg>
  46. <arg>1</arg>
  47. <archive>mysql-connector-java-5.1.27-bin.jar</archive>
  48. </sqoop>
  49. <ok to="end"/>
  50. <error to="fail"/>
  51. </action>
  52. <kill name="fail">
  53. <message>Sqoop free form failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
  54. </kill>
  55. <end name="end"/>
  56. </workflow-app>

执行

  1. $ bin/oozied.sh start
  2. $ ~/Documents/hadoop/bin/hadoop fs -put -f \
  3. oozie-apps/sqoop-freeform-mysql/ oozie-apps
  4. $ bin/oozie job -config oozie-apps/sqoop-freeform-mysql/job.properties -run
  5. $ bin/oozie job -info 0000013-200513181559469-oozie-jack-W
  6. $ ~/Documents/hadoop/bin/hadoop fs -cat \
  7. /user/jack/oozie-apps/output-data/sqoop-freeform-mysql/roles/*
  8. 20/05/13 22:14:47 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
  9. 1,1585619269,admin,admin
  10. 2,1585619269,public,public

参考

启动Sqoop失败的问题

“Launcher ERROR, reason: Main class [org.apache.oozie.action.hadoop.SqoopMain], exit code [1]”,引发的原因,Sqoop无法加载访问数据库所需的数据库驱动程序。