数据
part-r-00000 (1).txt
冒号前面代表用户 后面代表用户好友
:::info
需求: 求两两之间共同好友 和 个数
思路:
代码开始
按照好友进行分组 得到同一个好友下面的用户
public static class FirendsMap extends Mapper<LongWritable, Text, Text, Text>{@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {// 按照冒号分割String[] split = value.toString().split(":");// 对好友数据进行,分割String[] split1 = split[1].split(" ");for (String s : split1) {// 好友, 用户keycontext.write(new Text(s), new Text(split[0]));}}}
public static class FirendsReduce extends Reducer<Text, Text, Text, Text>{@Overrideprotected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {String data = "";// 循环用户 并且拼接for (Text value : values) {data += (value.toString() +"-");}// 去掉最后面的 -data = data.substring(0, data.length()-1);// 包前不包后context.write(new Text(data), key);}}
N-B AA BN-A-B CZ-A-C-B DC-A FB GN-C-Z KC-N LN-Z MZ NN-Z SZ W
package mapFirends;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;public class FirendsOne {public static void deleteFile(Path s) throws IOException {FileSystem fileSystem = FileSystem.get(new Configuration());if(fileSystem.exists(s)){fileSystem.delete(s, true);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();Job job = Job.getInstance(conf, "word count");job.setJarByClass(FirendsOne.class);job.setMapperClass(FirendsOne.FirendsMap.class);// job.setCombinerClass(FirendsOne.FirendsReduce.class);job.setReducerClass(FirendsOne.FirendsReduce.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);Path path = new Path("data/result4");deleteFile(path);FileInputFormat.addInputPath(job,new Path("data/firends.txt"));FileOutputFormat.setOutputPath(job,path);System.exit(job.waitForCompletion(true)?0:1);}public static class FirendsMap extends Mapper<LongWritable, Text, Text, Text>{@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {// 按照冒号分割String[] split = value.toString().split(":");// 对好友数据进行,分割String[] split1 = split[1].split(" ");for (String s : split1) {context.write(new Text(s), new Text(split[0]));}}}public static class FirendsReduce extends Reducer<Text, Text, Text, Text>{@Overrideprotected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {String data = "";for (Text value : values) {data += (value.toString() +"-");}data = data.substring(0, data.length()-1);// 包前不包后context.write(new Text(data), key);}}}
获取两两用户之间的好友(会排除掉只有一个用户一个好友的数据)
public static class FirendsMap extends Mapper<LongWritable, Text, Text, Text>{@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {// 按照制表符进行切割String[] split = value.toString().split("\t");// 对用户进行切分String[] split1 = split[0].split("-");Arrays.sort(split1);// 不要最后一个for (int i = 0; i < split1.length-1; i++) {// 跳过第一个for (int i1 = i+1; i1 < split1.length; i1++) {// 拼接两两用户context.write(new Text(split1[i]+"-"+split1[i1]), new Text(split[1]));}}}}
public static class FirendsReduce extends Reducer<Text, Text, Text, Text>{@Overrideprotected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {int count = 0;String result = "";// 拼接共同好友 这里用 \t 拼接// 计数共同好友for (Text value : values) {count +=1;result += value+"\t";}result += count; // result最后一个元素是\t 所以这里直接拼接个数context.write(key, new Text(result));}}
A-B C D 2A-C D F 2A-N C 1A-Z D 1B-C D 1B-N C A 2B-Z D 1C-N K L 2C-Z D K 2N-Z K M S 3
package mapFirends;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;import java.util.Arrays;public class FirendsTwo {public static void deleteFile(Path s) throws IOException {FileSystem fileSystem = FileSystem.get(new Configuration());if(fileSystem.exists(s)){fileSystem.delete(s, true);}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();Job job = Job.getInstance(conf, "word count");job.setJarByClass(FirendsTwo.class);job.setMapperClass(FirendsTwo.FirendsMap.class);// job.setCombinerClass(FirendsTwo.FirendsReduce.class);job.setReducerClass(FirendsTwo.FirendsReduce.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);Path path = new Path("data/result5");deleteFile(path);FileInputFormat.addInputPath(job,new Path("data/result4"));FileOutputFormat.setOutputPath(job,path);System.exit(job.waitForCompletion(true)?0:1);}public static class FirendsMap extends Mapper<LongWritable, Text, Text, Text>{@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {// 按照制表符进行切割String[] split = value.toString().split("\t");// 对用户进行切分String[] split1 = split[0].split("-");Arrays.sort(split1);// 不要最后一个for (int i = 0; i < split1.length-1; i++) {// 跳过第一个for (int i1 = i+1; i1 < split1.length; i1++) {// 拼接两两用户context.write(new Text(split1[i]+"-"+split1[i1]), new Text(split[1]));}}}}public static class FirendsReduce extends Reducer<Text, Text, Text, Text>{@Overrideprotected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {int count = 0;String result = "";// 拼接共同好友 这里用 \t 拼接// 计数共同好友for (Text value : values) {count +=1;result += value+"\t";}result += count; // result最后一个元素是\t 所以这里直接拼接个数context.write(key, new Text(result));}}}
总结
- 理解逻辑 代码不难
- 注意拼接用户的时候一个不要最后一个 一个不要第一个
