查询mysql数据,导入hdfs路径
sqoop import \--connect jdbc:mysql://10.10.168.20:3306/blued \--username root \--password ucloudlyc40666 \-Dmapreduce.job.queuename=hive \ 队列为hive队列--query "select * from users_3rd_orders where from_unixtime(pay_time,'%Y%m%d') = ${DAY} and status = 1 and \$CONDITIONS" \--target-dir /data/hive-temp/ods_users_3rd_orders \--delete-target-dir \--fields-terminated-by "," \--lines-terminated-by "\n" \--null-string '\\N' \ 把string类型的null转换成hive底层的\N--null-non-string '\\N' \ 把非string类型的null转换成hive底层的\N--hive-drop-import-delims \ 把列里面的分隔符删除(转换成'')--compress \--compression-codec lzop \ 输出文件为lzo压缩,还需要创建lzo索引--split-by pay_time \--num-mappers 1 简写-m:指定map数 默认是四个。当指定为1时 可以不用设置split-by参数,大于1 需要指定hadoop jar /opt/module/hadoop-3.1.3/share/hadoop/common/hadoop-lzo-0.4.20.jar com.hadoop.compression.lzo.DistributedLzoIndexer /data/hive-temp/ods_users_3rd_orders
加上?tinyInt1isBit=false 就不会吧mysql的tinyint转换成boolean值
sqoop import \--connect jdbc:mysql://10.10.86.5:3306/blued?tinyInt1isBit=false \ --username root \--password ucloudlyc40666 \--table users_exchange \--target-dir /data/hive-temp/users_exchange \--delete-target-dir \--hive-database blued_log \--hive-table marvin_test4 \--hive-import \--hive-overwrite \--fields-terminated-by "," \--lines-terminated-by "\n" \--null-string '\\N' \ 把string类型的null转换成hive底层的\N--null-non-string '\\N' \ 把非string类型的null转换成hive底层的\N--hive-drop-import-delims \ 把列里面的分隔符删除(转换成’’)--split-by id \-m -1
自动新建hive表 Mysql到hive表全量到入
sqoop import \--connect jdbc:mysql://10.10.86.5:3306/blued \--username root \--password ucloudlyc40666 \--table users_exchange \--target-dir /data/hive-temp/users_exchange \--delete-target-dir \--hive-database blued_log \--hive-table marvin_test4 \--hive-import \--hive-overwrite \--create-hive-table \ —去掉这一行就不会自动创建hive表--fields-terminated-by "," \--lines-terminated-by "\n" \--null-string '\\N' \ 把string类型的null转换成hive底层的\N--null-non-string '\\N' \ 把非string类型的null转换成hive底层的\N--hive-drop-import-delims \ 把列里面的分隔符删除(转换成’’)--split-by id \-m -1
查询mysql数据 导入hive分区
sqoop import \--connect jdbc:mysql://10.10.168.20:3306/blued \--username root \--password ucloudlyc40666 \--query "select * from users_3rd_orders where from_unixtime(pay_time,'%Y%m%d') = ${DAY} and status = 1 and \$CONDITIONS" \--target-dir /data/hive-temp/ods_users_3rd_orders \--delete-target-dir \--hive-database blued_log \--hive-table ods_users_3rd_orders \--hive-partition-key day \--hive-partition-value ${DAY} \--hive-import \--hive-overwrite \--fields-terminated-by "," \--lines-terminated-by "\n”" \--null-string '\\N' \ 把string类型的null转换成hive底层的\N--null-non-string '\\N' \ 把非string类型的null转换成hive底层的\N--hive-drop-import-delims \ 把列里面的分隔符删除(转换成'')--compress \--compression-codec lzop \ 输出文件为lzo压缩,还需要创建lzo索引--split-by pay_time \-m -1