考试时间下午15:30~17:40分 共计2个小时
考试中
一个题目,找到答案很容易,但是思考却很困难,不要做急于奔波的人,忙忙碌碌,却忘记了思考!
一、简单题(40分)
1、一个260M的文件,在HDFS上存储,分为几个块?用MapReduce读取,分为几个片?
2、简述HDFS的开机启动流程(非第一次启动)
3、简述YARN的job提交流程
4、简述MapTask做了什么事情,从读取文件到生成MapTask的文件
二、编程题(60分)
1、求共同好友
以下是qq的好友列表数据,冒号前是一个用户,冒号后是该用户的所有好友(数据中的好友关系是单向的)
A:B,C,D,F,E,OB:A,C,E,KC:A,B,D,E,ID:A,E,F,LE:B,C,D,M,LF:A,B,C,D,E,O,MG:A,C,D,E,FH:A,C,D,E,OI:A,OJ:B,OK:A,C,DL:D,E,FM:E,F,GO:A,H,I,J
求出哪些人两两之间有共同好友,及他俩的共同好友都有谁?
A-B C-D-F
首先此题需要进行两次mapreduce首先将数据切割A:B,C,D,F,E,OB:A,C,E,KA的好友是B,C,D,F,E,O也就是说B是A的好友,C是A的好友.....可以将数据变为B AC AD AF AE AO AA BC BE BK B然后根据Key值合并的数据是B AC A-BD AF AE A-B当然全部的数据也会出现F A-B-C 的情况比如是: C A-B 说明 A和B的共同好友是CF A-B-C A,B,C的共同好友是F然后接着进入第二次mapReduce ,文件就是第一次的mapreduce产生的文件。F A-B-C进行切割后,第二个值 A-B-C 再次切割,两两配对,输出即可。A-B-C DA-B DA-C DB-C D根据key进行合并A-B D-E-F表示A和B的共同好友是D-E-F
将写好的代码,打包上传,在yarn平台上运行,并在yarn的web界面上截图,并在文档中贴出运行成功的截图。
参考答案:
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;/*** @Author laoyan* @Description TODO* @Date 2022/4/17 9:53* @Version 1.0*//*** A:B,C,D,F,E,O** B A* C A* D A* F A* E A**/class QQMapper01 extends Mapper<LongWritable, Text,Text,Text>{private Text v2;private Text k2;@Overrideprotected void setup(Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {k2 =new Text();v2 = new Text();}@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {String line = value.toString();String[] split = line.split(":");String val = split[0];String[] arr = split[1].split(","); //B,C,D,F,E,Ofor (String s:arr ) {k2.set(s);v2.set(val);context.write(k2,v2);}}}/*** 输入* B [A C D E]* A [E F D G]* 结果:* B A-C-D-E**/class QQReducer01 extends Reducer<Text,Text,Text,Text>{private Text v3;@Overrideprotected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {v3=new Text();}@Overrideprotected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {StringBuffer sb =new StringBuffer();for (Text s:values) {sb.append(s.toString()).append("-");}v3.set(sb.toString().substring(0,sb.length()-1));context.write(key,v3);}}public class QQDriver01 {public static void main(String[] args) throws Exception {Configuration conf = new Configuration();conf.set("fs.defaultFS", "file:///");conf.set("mapreduce.framework.name","local");Job job = Job.getInstance(conf,"QQ第一次MR");job.setMapperClass(QQMapper01.class);job.setReducerClass(QQReducer01.class);job.setJarByClass(QQDriver01.class);// 设置map和reduce 的输出类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);FileInputFormat.setInputPaths(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));System.exit(job.waitForCompletion(true)?0:1);}}

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;import java.util.Arrays;/*** @Author laoyan* @Description TODO* @Date 2022/4/17 10:15* @Version 1.0*//*** A I-K-B-G-F-H-O-C-D-* I-K A* I-B A* .....*** reduce 的输入:* I-K [A,B,C]* 输出* I-K A-B-C*/class QQMapper02 extends Mapper<LongWritable, Text,Text,Text>{private Text v2;private Text k2;@Overrideprotected void setup(Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {k2 =new Text();v2 = new Text();}@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {String line = value.toString();String[] split = line.split("\t");String k = split[0];String[] arr = split[1].split("-");Arrays.sort(arr);System.out.println(Arrays.toString(arr));// B C D E Ffor (int i=0 ;i< arr.length - 1;i++){for (int j= i+1 ;j< arr.length ; j++){k2.set(arr[i]+"-"+arr[j]);v2.set(k);context.write(k2,v2);}}}}class QQReducer02 extends Reducer<Text,Text,Text,Text>{private Text v3;@Overrideprotected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {v3=new Text();}/**** reduce 的输入:* * I-K [A,B,C]* * 输出* * I-K A-B-C*/@Overrideprotected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {StringBuffer sb =new StringBuffer();for (Text t:values) {sb.append(t.toString()).append("-");}String v = sb.toString().substring(0,sb.length()-1);v3.set(v);context.write(key,v3);}}public class QQDriver02 {public static void main(String[] args) throws Exception{Configuration conf = new Configuration();conf.set("fs.defaultFS", "file:///");conf.set("mapreduce.framework.name","local");Job job = Job.getInstance(conf,"QQ第二次MR");job.setMapperClass(QQMapper02.class);job.setReducerClass(QQReducer02.class);job.setJarByClass(QQDriver02.class);// 设置map和reduce 的输出类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);FileInputFormat.setInputPaths(job,new Path(args[0]));FileOutputFormat.setOutputPath(job,new Path(args[1]));System.exit(job.waitForCompletion(true)?0:1);}}

测试切割是几个值:
/*** @Author laoyan* @Description TODO* @Date 2022/4/17 10:37* @Version 1.0*/public class MainText {public static void main(String[] args) {String str = "F-M-L-H-G-D-C-B-A-";String[] arr = str.split("-");System.out.println(arr.length);// 答案是9个}}
