DataX—MySQL至Hive

相关文档

MySQL Reader插件

HDFS Writer插件

Hive中建表

  1. create table pointasssit.ods_aircraftstandplacemonitnodeplacerelations(
  2. `id` bigint,
  3. `creationtime` string,
  4. `creatoruserid` bigint,
  5. `lastmodificationtime` string,
  6. `lastmodifieruserid` bigint,
  7. `aircraftstandplaceid` bigint,
  8. `moninodeplaceid` bigint
  9. )
  10. row format delimited fields terminated by '\t'
  11. stored as textfile
  12. location '/dtInsight/hive/warehouse/pointasssit.db/ods_aircraftstandplacemonitnodeplacerelations';

说明:若不存在数据库pointasssit,需要先创建:create database pointasssit

DataX配置文件

MySQL Reader

  1. "reader": {
  2. "name": "mysqlreader",
  3. "parameter": {
  4. "username": "root",
  5. "password": "Pass2017",
  6. "column": ["*"],
  7. "splitPk": "Id",
  8. "connection": [
  9. {
  10. "table": [
  11. "AircraftStandPlaceMonitNodePlaceRelations"
  12. ],
  13. "jdbcUrl": [
  14. "jdbc:mysql://172.26.1.234:3306/PointAssSIT"
  15. ]
  16. }
  17. ]
  18. }
  19. }

HDFS Writer

  1. "writer": {
  2. "name": "hdfswriter",
  3. "parameter": {
  4. "defaultFS": "hdfs://hd02:9000",
  5. "fileType": "text",
  6. "path": "/dtInsight/hive/warehouse/pointasssit.db/ods_aircraftstandplacemonitnodeplacerelations",
  7. "fileName": "aircraftstandplacemonitnodeplacerelations",
  8. "column": [
  9. {
  10. "name": "id",
  11. "type": "BIGINT"
  12. },
  13. {
  14. "name": "creationtime",
  15. "type": "STRING"
  16. },
  17. {
  18. "name": "creatoruserid",
  19. "type": "BIGINT"
  20. },
  21. {
  22. "name": "lastmodificationtime",
  23. "type": "STRING"
  24. },
  25. {
  26. "name": "lastmodifieruserid",
  27. "type": "BIGINT"
  28. },
  29. {
  30. "name": "aircraftstandplaceid",
  31. "type": "BIGINT"
  32. },
  33. {
  34. "name": "moninodeplaceid",
  35. "type": "BIGINT"
  36. }
  37. ],
  38. "writeMode": "append",
  39. "fieldDelimiter": "\t"
  40. }
  41. }
  • defaultFS: 需要确认究竟是9000还是其他端口
  • fileType: 有两种选择text和orc,要和建表语句匹配
  • path: 和建表语句匹配
  • fileName: 最后生成的文件名称会随机追加字符串

Datax--MySQL至Hive - 图1

  • column:要一列一列准确无误的写完全(有点坑爹,太累了)
  • writeMode:追加模式
  • fieldDelimiter:要和建表语句中的delimited fields terminated相匹配

完整的json配置文件

  1. {
  2. "setting": {},
  3. "job": {
  4. "setting": {
  5. "speed": {
  6. "channel": 3
  7. }
  8. },
  9. "content": [
  10. {
  11. "reader": {
  12. "name": "mysqlreader",
  13. "parameter": {
  14. "username": "root",
  15. "password": "Pass2017",
  16. "column": ["*"],
  17. "splitPk": "Id",
  18. "connection": [
  19. {
  20. "table": [
  21. "AircraftStandPlaceMonitNodePlaceRelations"
  22. ],
  23. "jdbcUrl": [
  24. "jdbc:mysql://172.26.1.234:3306/PointAssSIT"
  25. ]
  26. }
  27. ]
  28. }
  29. },
  30. "writer": {
  31. "name": "hdfswriter",
  32. "parameter": {
  33. "defaultFS": "hdfs://hd02:9000",
  34. "fileType": "text",
  35. "path": "/dtInsight/hive/warehouse/pointasssit.db/ods_aircraftstandplacemonitnodeplacerelations",
  36. "fileName": "aircraftstandplacemonitnodeplacerelations",
  37. "column": [
  38. {
  39. "name": "id",
  40. "type": "BIGINT"
  41. },
  42. {
  43. "name": "creationtime",
  44. "type": "STRING"
  45. },
  46. {
  47. "name": "creatoruserid",
  48. "type": "BIGINT"
  49. },
  50. {
  51. "name": "lastmodificationtime",
  52. "type": "STRING"
  53. },
  54. {
  55. "name": "lastmodifieruserid",
  56. "type": "BIGINT"
  57. },
  58. {
  59. "name": "aircraftstandplaceid",
  60. "type": "BIGINT"
  61. },
  62. {
  63. "name": "moninodeplaceid",
  64. "type": "BIGINT"
  65. }
  66. ],
  67. "writeMode": "append",
  68. "fieldDelimiter": "\t"
  69. }
  70. }
  71. }
  72. ]
  73. }
  74. }

运行任务

python bin/datax.py jobs/test.json