考试时间下午15:30~17:40分 共计2个小时

考试中

  1. 一个题目,找到答案很容易,但是思考却很困难,不要做急于奔波的人,忙忙碌碌,却忘记了思考!

一、简单题(40分)

1、一个260M的文件,在HDFS上存储,分为几个块?用MapReduce读取,分为几个片?
2、简述HDFS的开机启动流程(非第一次启动)
3、简述YARN的job提交流程
4、简述MapTask做了什么事情,从读取文件到生成MapTask的文件

二、编程题(60分)

1、求共同好友
以下是qq的好友列表数据,冒号前是一个用户,冒号后是该用户的所有好友(数据中的好友关系是单向的)

  1. A:B,C,D,F,E,O
  2. B:A,C,E,K
  3. C:A,B,D,E,I
  4. D:A,E,F,L
  5. E:B,C,D,M,L
  6. F:A,B,C,D,E,O,M
  7. G:A,C,D,E,F
  8. H:A,C,D,E,O
  9. I:A,O
  10. J:B,O
  11. K:A,C,D
  12. L:D,E,F
  13. M:E,F,G
  14. O:A,H,I,J

求出哪些人两两之间有共同好友,及他俩的共同好友都有谁?
A-B C-D-F

  1. 首先此题需要进行两次mapreduce
  2. 首先将数据切割
  3. A:B,C,D,F,E,O
  4. B:A,C,E,K
  5. A的好友是B,C,D,F,E,O
  6. 也就是说BA的好友,CA的好友.....
  7. 可以将数据变为
  8. B A
  9. C A
  10. D A
  11. F A
  12. E A
  13. O A
  14. A B
  15. C B
  16. E B
  17. K B
  18. 然后根据Key值合并的数据是
  19. B A
  20. C A-B
  21. D A
  22. F A
  23. E A-B
  24. 当然全部的数据也会出现
  25. F A-B-C 的情况
  26. 比如是: C A-B 说明 AB的共同好友是C
  27. F A-B-C A,B,C的共同好友是F
  28. 然后接着进入第二次mapReduce ,文件就是第一次的mapreduce产生的文件。
  29. F A-B-C
  30. 进行切割后,第二个值 A-B-C 再次切割,两两配对,输出即可。
  31. A-B-C D
  32. A-B D
  33. A-C D
  34. B-C D
  35. 根据key进行合并
  36. A-B D-E-F
  37. 表示AB的共同好友是D-E-F

将写好的代码,打包上传,在yarn平台上运行,并在yarn的web界面上截图,并在文档中贴出运行成功的截图。

参考答案:

  1. import org.apache.hadoop.conf.Configuration;
  2. import org.apache.hadoop.fs.Path;
  3. import org.apache.hadoop.io.IntWritable;
  4. import org.apache.hadoop.io.LongWritable;
  5. import org.apache.hadoop.io.Text;
  6. import org.apache.hadoop.mapreduce.Job;
  7. import org.apache.hadoop.mapreduce.Mapper;
  8. import org.apache.hadoop.mapreduce.Reducer;
  9. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  10. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  11. import java.io.IOException;
  12. /**
  13. * @Author laoyan
  14. * @Description TODO
  15. * @Date 2022/4/17 9:53
  16. * @Version 1.0
  17. */
  18. /**
  19. * A:B,C,D,F,E,O
  20. *
  21. * B A
  22. * C A
  23. * D A
  24. * F A
  25. * E A
  26. *
  27. */
  28. class QQMapper01 extends Mapper<LongWritable, Text,Text,Text>{
  29. private Text v2;
  30. private Text k2;
  31. @Override
  32. protected void setup(Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  33. k2 =new Text();
  34. v2 = new Text();
  35. }
  36. @Override
  37. protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  38. String line = value.toString();
  39. String[] split = line.split(":");
  40. String val = split[0];
  41. String[] arr = split[1].split(","); //B,C,D,F,E,O
  42. for (String s:arr ) {
  43. k2.set(s);
  44. v2.set(val);
  45. context.write(k2,v2);
  46. }
  47. }
  48. }
  49. /**
  50. * 输入
  51. * B [A C D E]
  52. * A [E F D G]
  53. * 结果:
  54. * B A-C-D-E
  55. *
  56. */
  57. class QQReducer01 extends Reducer<Text,Text,Text,Text>{
  58. private Text v3;
  59. @Override
  60. protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  61. v3=new Text();
  62. }
  63. @Override
  64. protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  65. StringBuffer sb =new StringBuffer();
  66. for (Text s:values) {
  67. sb.append(s.toString()).append("-");
  68. }
  69. v3.set(sb.toString().substring(0,sb.length()-1));
  70. context.write(key,v3);
  71. }
  72. }
  73. public class QQDriver01 {
  74. public static void main(String[] args) throws Exception {
  75. Configuration conf = new Configuration();
  76. conf.set("fs.defaultFS", "file:///");
  77. conf.set("mapreduce.framework.name","local");
  78. Job job = Job.getInstance(conf,"QQ第一次MR");
  79. job.setMapperClass(QQMapper01.class);
  80. job.setReducerClass(QQReducer01.class);
  81. job.setJarByClass(QQDriver01.class);
  82. // 设置map和reduce 的输出类型
  83. job.setOutputKeyClass(Text.class);
  84. job.setOutputValueClass(Text.class);
  85. FileInputFormat.setInputPaths(job,new Path(args[0]));
  86. FileOutputFormat.setOutputPath(job,new Path(args[1]));
  87. System.exit(job.waitForCompletion(true)?0:1);
  88. }
  89. }

image.png

  1. import org.apache.hadoop.conf.Configuration;
  2. import org.apache.hadoop.fs.Path;
  3. import org.apache.hadoop.io.LongWritable;
  4. import org.apache.hadoop.io.Text;
  5. import org.apache.hadoop.mapreduce.Job;
  6. import org.apache.hadoop.mapreduce.Mapper;
  7. import org.apache.hadoop.mapreduce.Reducer;
  8. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  9. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  10. import java.io.IOException;
  11. import java.util.Arrays;
  12. /**
  13. * @Author laoyan
  14. * @Description TODO
  15. * @Date 2022/4/17 10:15
  16. * @Version 1.0
  17. */
  18. /**
  19. * A I-K-B-G-F-H-O-C-D-
  20. * I-K A
  21. * I-B A
  22. * .....
  23. *
  24. *
  25. * reduce 的输入:
  26. * I-K [A,B,C]
  27. * 输出
  28. * I-K A-B-C
  29. */
  30. class QQMapper02 extends Mapper<LongWritable, Text,Text,Text>{
  31. private Text v2;
  32. private Text k2;
  33. @Override
  34. protected void setup(Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  35. k2 =new Text();
  36. v2 = new Text();
  37. }
  38. @Override
  39. protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  40. String line = value.toString();
  41. String[] split = line.split("\t");
  42. String k = split[0];
  43. String[] arr = split[1].split("-");
  44. Arrays.sort(arr);
  45. System.out.println(Arrays.toString(arr));
  46. // B C D E F
  47. for (int i=0 ;i< arr.length - 1;i++){
  48. for (int j= i+1 ;j< arr.length ; j++){
  49. k2.set(arr[i]+"-"+arr[j]);
  50. v2.set(k);
  51. context.write(k2,v2);
  52. }
  53. }
  54. }
  55. }
  56. class QQReducer02 extends Reducer<Text,Text,Text,Text>{
  57. private Text v3;
  58. @Override
  59. protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  60. v3=new Text();
  61. }
  62. /**
  63. *
  64. * reduce 的输入:
  65. * * I-K [A,B,C]
  66. * * 输出
  67. * * I-K A-B-C
  68. */
  69. @Override
  70. protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  71. StringBuffer sb =new StringBuffer();
  72. for (Text t:values) {
  73. sb.append(t.toString()).append("-");
  74. }
  75. String v = sb.toString().substring(0,sb.length()-1);
  76. v3.set(v);
  77. context.write(key,v3);
  78. }
  79. }
  80. public class QQDriver02 {
  81. public static void main(String[] args) throws Exception{
  82. Configuration conf = new Configuration();
  83. conf.set("fs.defaultFS", "file:///");
  84. conf.set("mapreduce.framework.name","local");
  85. Job job = Job.getInstance(conf,"QQ第二次MR");
  86. job.setMapperClass(QQMapper02.class);
  87. job.setReducerClass(QQReducer02.class);
  88. job.setJarByClass(QQDriver02.class);
  89. // 设置map和reduce 的输出类型
  90. job.setOutputKeyClass(Text.class);
  91. job.setOutputValueClass(Text.class);
  92. FileInputFormat.setInputPaths(job,new Path(args[0]));
  93. FileOutputFormat.setOutputPath(job,new Path(args[1]));
  94. System.exit(job.waitForCompletion(true)?0:1);
  95. }
  96. }

image.png
测试切割是几个值:

  1. /**
  2. * @Author laoyan
  3. * @Description TODO
  4. * @Date 2022/4/17 10:37
  5. * @Version 1.0
  6. */
  7. public class MainText {
  8. public static void main(String[] args) {
  9. String str = "F-M-L-H-G-D-C-B-A-";
  10. String[] arr = str.split("-");
  11. System.out.println(arr.length);
  12. // 答案是9个
  13. }
  14. }