DataX—MySQL至Hive
相关文档
Hive中建表
create table pointasssit.ods_aircraftstandplacemonitnodeplacerelations(
`id` bigint,
`creationtime` string,
`creatoruserid` bigint,
`lastmodificationtime` string,
`lastmodifieruserid` bigint,
`aircraftstandplaceid` bigint,
`moninodeplaceid` bigint
)
row format delimited fields terminated by '\t'
stored as textfile
location '/dtInsight/hive/warehouse/pointasssit.db/ods_aircraftstandplacemonitnodeplacerelations';
说明:若不存在数据库pointasssit,需要先创建:create database pointasssit
DataX配置文件
MySQL Reader
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "Pass2017",
"column": ["*"],
"splitPk": "Id",
"connection": [
{
"table": [
"AircraftStandPlaceMonitNodePlaceRelations"
],
"jdbcUrl": [
"jdbc:mysql://172.26.1.234:3306/PointAssSIT"
]
}
]
}
}
HDFS Writer
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://hd02:9000",
"fileType": "text",
"path": "/dtInsight/hive/warehouse/pointasssit.db/ods_aircraftstandplacemonitnodeplacerelations",
"fileName": "aircraftstandplacemonitnodeplacerelations",
"column": [
{
"name": "id",
"type": "BIGINT"
},
{
"name": "creationtime",
"type": "STRING"
},
{
"name": "creatoruserid",
"type": "BIGINT"
},
{
"name": "lastmodificationtime",
"type": "STRING"
},
{
"name": "lastmodifieruserid",
"type": "BIGINT"
},
{
"name": "aircraftstandplaceid",
"type": "BIGINT"
},
{
"name": "moninodeplaceid",
"type": "BIGINT"
}
],
"writeMode": "append",
"fieldDelimiter": "\t"
}
}
- defaultFS: 需要确认究竟是9000还是其他端口
- fileType: 有两种选择text和orc,要和建表语句匹配
- path: 和建表语句匹配
- fileName: 最后生成的文件名称会随机追加字符串
- column:要一列一列准确无误的写完全(有点坑爹,太累了)
- writeMode:追加模式
- fieldDelimiter:要和建表语句中的
delimited fields terminated
相匹配
完整的json配置文件
{
"setting": {},
"job": {
"setting": {
"speed": {
"channel": 3
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "root",
"password": "Pass2017",
"column": ["*"],
"splitPk": "Id",
"connection": [
{
"table": [
"AircraftStandPlaceMonitNodePlaceRelations"
],
"jdbcUrl": [
"jdbc:mysql://172.26.1.234:3306/PointAssSIT"
]
}
]
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://hd02:9000",
"fileType": "text",
"path": "/dtInsight/hive/warehouse/pointasssit.db/ods_aircraftstandplacemonitnodeplacerelations",
"fileName": "aircraftstandplacemonitnodeplacerelations",
"column": [
{
"name": "id",
"type": "BIGINT"
},
{
"name": "creationtime",
"type": "STRING"
},
{
"name": "creatoruserid",
"type": "BIGINT"
},
{
"name": "lastmodificationtime",
"type": "STRING"
},
{
"name": "lastmodifieruserid",
"type": "BIGINT"
},
{
"name": "aircraftstandplaceid",
"type": "BIGINT"
},
{
"name": "moninodeplaceid",
"type": "BIGINT"
}
],
"writeMode": "append",
"fieldDelimiter": "\t"
}
}
}
]
}
}
运行任务
python bin/datax.py jobs/test.json