序列化

把内存中对象转换成字节序列(或其他数据传输协议)以便于存储到磁盘(持久化)和网络传输

反序列化

接收到的字节序列或者其他传输协议或者是磁盘的持久化数据转换成内存的对象

自定义bean对象实现序列化接口(Writable)

具体实现bean对象序列化步骤如下7步。
(1)必须实现Writable接口
(2)反序列化时,需要反射调用空参构造函数,所以必须有空参构造
(3)重写序列化方法
(4)重写反序列化方法
(5)注意反序列化的顺序和序列化的顺序完全一致
(6)要想把结果显示在文件中,需要重写toString(),可用”\t”分开,方便后续用。
(7)如果需要将自定义的bean放在key中传输,则还需要实现Comparable接口,因为MapReduce框中的Shuffle过程要求对key必须能排序。详见后面排序案例。

hello word

测试数据

  1. 1 13726230503 24681 24681 200
  2. 2 13826544101 264 0 200
  3. 3 13926435656 132 1512 200
  4. 4 13926251106 240 0 200
  5. 5 18211575961 1527 2106 200
  6. 6 18211575961 4116 1432 200
  7. 7 13560439658 1116 954 200
  8. 8 15920133257 3156 2936 200
  9. 9 13719199419 240 0 200
  10. 10 13660577991 6960 690 200
  11. 11 15013685858 3659 3538 200
  12. 12 15989002119 1938 180 200
  13. 13 13560439658 918 4938 200
  14. 14 13480253104 80 180 200
  15. 15 13602846565 1938 2910 200
  16. 16 13922314466 3008 3720 200
  17. 17 13502468823 7335 110349 200
  18. 18 18320173382 9531 2412 200
  19. 19 13925057413 11058 48243 200
  20. 20 13760778710 120 120 200
  21. 21 13560436666 2481 24681 200
  22. 22 13560436666 1116 954 200

hadoop 序列化和反序列化对象

  1. package com.bigdata.mapreduce.flowdata;
  2. import org.apache.hadoop.io.Writable;
  3. import java.io.DataInput;
  4. import java.io.DataOutput;
  5. import java.io.IOException;
  6. public class FlowBean implements Writable {
  7. private long upFlow;
  8. private long downFlow;
  9. private long sumFlow;
  10. public FlowBean() {
  11. }
  12. public String toString() {
  13. return upFlow + "\t" + downFlow + "\t" + sumFlow;
  14. }
  15. public void set(long upFlow, long downFlow) {
  16. this.upFlow = upFlow;
  17. this.downFlow = downFlow;
  18. this.sumFlow = upFlow + downFlow;
  19. }
  20. public long getUpFlow() {
  21. return upFlow;
  22. }
  23. public void setUpFlow(long upFlow) {
  24. this.upFlow = upFlow;
  25. }
  26. public long getDownFlow() {
  27. return downFlow;
  28. }
  29. public void setDownFlow(long downFlow) {
  30. this.downFlow = downFlow;
  31. }
  32. public long getSumFlow() {
  33. return sumFlow;
  34. }
  35. public void setSumFlow(long sumFlow) {
  36. this.sumFlow = sumFlow;
  37. }
  38. // 序列化
  39. public void write(DataOutput dataOutput) throws IOException {
  40. dataOutput.writeLong(upFlow);
  41. dataOutput.writeLong(downFlow);
  42. dataOutput.writeLong(sumFlow);
  43. }
  44. // 反序列 跟序列化顺序一致
  45. public void readFields(DataInput dataInput) throws IOException {
  46. upFlow = dataInput.readLong();
  47. downFlow = dataInput.readLong();
  48. sumFlow = dataInput.readLong();
  49. }
  50. }

maper

  1. package com.bigdata.mapreduce.flowdata;
  2. import org.apache.hadoop.io.Writable;
  3. import java.io.DataInput;
  4. import java.io.DataOutput;
  5. import java.io.IOException;
  6. public class FlowBean implements Writable {
  7. private long upFlow;
  8. private long downFlow;
  9. private long sumFlow;
  10. public FlowBean() {
  11. }
  12. public String toString() {
  13. return upFlow + "\t" + downFlow + "\t" + sumFlow;
  14. }
  15. public void set(long upFlow, long downFlow) {
  16. this.upFlow = upFlow;
  17. this.downFlow = downFlow;
  18. this.sumFlow = upFlow + downFlow;
  19. }
  20. public long getUpFlow() {
  21. return upFlow;
  22. }
  23. public void setUpFlow(long upFlow) {
  24. this.upFlow = upFlow;
  25. }
  26. public long getDownFlow() {
  27. return downFlow;
  28. }
  29. public void setDownFlow(long downFlow) {
  30. this.downFlow = downFlow;
  31. }
  32. public long getSumFlow() {
  33. return sumFlow;
  34. }
  35. public void setSumFlow(long sumFlow) {
  36. this.sumFlow = sumFlow;
  37. }
  38. // 序列化
  39. public void write(DataOutput dataOutput) throws IOException {
  40. dataOutput.writeLong(upFlow);
  41. dataOutput.writeLong(downFlow);
  42. dataOutput.writeLong(sumFlow);
  43. }
  44. // 反序列 跟序列化顺序一致
  45. public void readFields(DataInput dataInput) throws IOException {
  46. upFlow = dataInput.readLong();
  47. downFlow = dataInput.readLong();
  48. sumFlow = dataInput.readLong();
  49. }
  50. }

reducer

  1. package com.bigdata.mapreduce.flowdata;
  2. import org.apache.hadoop.io.Text;
  3. import org.apache.hadoop.mapreduce.Reducer;
  4. import java.io.IOException;
  5. public class FlowReducer extends Reducer<Text, FlowBean, Text, FlowBean> {
  6. private FlowBean sumFlow = new FlowBean();
  7. @Override
  8. protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
  9. long sumUpFlow = 0;
  10. long sumDownFlow = 0;
  11. for (FlowBean value : values) {
  12. sumUpFlow += value.getUpFlow();
  13. sumDownFlow += value.getDownFlow();
  14. }
  15. sumFlow.set(sumUpFlow, sumDownFlow);
  16. context.write(key, sumFlow);
  17. }
  18. }

driver

  1. package com.bigdata.mapreduce.flowdata;
  2. import org.apache.hadoop.conf.Configuration;
  3. import org.apache.hadoop.fs.Path;
  4. import org.apache.hadoop.io.Text;
  5. import org.apache.hadoop.mapreduce.Job;
  6. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  7. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  8. import java.io.IOException;
  9. public class FlowDriver {
  10. public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
  11. Job job = Job.getInstance(new Configuration());
  12. //设置类路径
  13. job.setJarByClass(FlowDriver.class);
  14. job.setMapperClass(FlowMapper.class);
  15. job.setReducerClass(FlowReducer.class);
  16. job.setMapOutputKeyClass(Text.class);
  17. job.setMapOutputValueClass(FlowBean.class);
  18. job.setOutputKeyClass(Text.class);
  19. job.setOutputValueClass(FlowBean.class);
  20. FileInputFormat.setInputPaths(job, new Path(args[0]));
  21. FileOutputFormat.setOutputPath(job, new Path(args[1]));
  22. boolean b = job.waitForCompletion(true);
  23. System.exit(b ? 0 : 1);
  24. }
  25. }

运行结果

13480253104    80    180    260
13502468823    7335    110349    117684
13560436666    3597    25635    29232
13560439658    2034    5892    7926
13602846565    1938    2910    4848
13660577991    6960    690    7650
13719199419    240    0    240
13726230503    24681    24681    49362
13760778710    120    120    240
13826544101    264    0    264
13922314466    3008    3720    6728
13925057413    11058    48243    59301
13926251106    240    0    240
13926435656    132    1512    1644
15013685858    3659    3538    7197
15920133257    3156    2936    6092
15989002119    1938    180    2118
18211575961    5643    3538    9181
18320173382    9531    2412    11943