数据

part-r-00000 (1).txt
冒号前面代表用户 后面代表用户好友 :::info 需求: 求两两之间共同好友 和 个数
思路:image.png

:::

代码开始

按照好友进行分组 得到同一个好友下面的用户

  1. public static class FirendsMap extends Mapper<LongWritable, Text, Text, Text>{
  2. @Override
  3. protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  4. // 按照冒号分割
  5. String[] split = value.toString().split(":");
  6. // 对好友数据进行,分割
  7. String[] split1 = split[1].split(" ");
  8. for (String s : split1) {
  9. // 好友, 用户key
  10. context.write(new Text(s), new Text(split[0]));
  11. }
  12. }
  13. }
  1. public static class FirendsReduce extends Reducer<Text, Text, Text, Text>{
  2. @Override
  3. protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  4. String data = "";
  5. // 循环用户 并且拼接
  6. for (Text value : values) {
  7. data += (value.toString() +"-");
  8. }
  9. // 去掉最后面的 -
  10. data = data.substring(0, data.length()-1);// 包前不包后
  11. context.write(new Text(data), key);
  12. }
  13. }
  1. N-B A
  2. A B
  3. N-A-B C
  4. Z-A-C-B D
  5. C-A F
  6. B G
  7. N-C-Z K
  8. C-N L
  9. N-Z M
  10. Z N
  11. N-Z S
  12. Z W
  1. package mapFirends;
  2. import org.apache.hadoop.conf.Configuration;
  3. import org.apache.hadoop.fs.FileSystem;
  4. import org.apache.hadoop.fs.Path;
  5. import org.apache.hadoop.io.LongWritable;
  6. import org.apache.hadoop.io.Text;
  7. import org.apache.hadoop.mapreduce.Job;
  8. import org.apache.hadoop.mapreduce.Mapper;
  9. import org.apache.hadoop.mapreduce.Reducer;
  10. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  11. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  12. import java.io.IOException;
  13. public class FirendsOne {
  14. public static void deleteFile(Path s) throws IOException {
  15. FileSystem fileSystem = FileSystem.get(new Configuration());
  16. if(fileSystem.exists(s)){
  17. fileSystem.delete(s, true);
  18. }
  19. }
  20. public static void main(String[] args) throws Exception {
  21. Configuration conf = new Configuration();
  22. Job job = Job.getInstance(conf, "word count");
  23. job.setJarByClass(FirendsOne.class);
  24. job.setMapperClass(FirendsOne.FirendsMap.class);
  25. // job.setCombinerClass(FirendsOne.FirendsReduce.class);
  26. job.setReducerClass(FirendsOne.FirendsReduce.class);
  27. job.setOutputKeyClass(Text.class);
  28. job.setOutputValueClass(Text.class);
  29. Path path = new Path("data/result4");
  30. deleteFile(path);
  31. FileInputFormat.addInputPath(job,new Path("data/firends.txt"));
  32. FileOutputFormat.setOutputPath(job,path);
  33. System.exit(job.waitForCompletion(true)?0:1);
  34. }
  35. public static class FirendsMap extends Mapper<LongWritable, Text, Text, Text>{
  36. @Override
  37. protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  38. // 按照冒号分割
  39. String[] split = value.toString().split(":");
  40. // 对好友数据进行,分割
  41. String[] split1 = split[1].split(" ");
  42. for (String s : split1) {
  43. context.write(new Text(s), new Text(split[0]));
  44. }
  45. }
  46. }
  47. public static class FirendsReduce extends Reducer<Text, Text, Text, Text>{
  48. @Override
  49. protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  50. String data = "";
  51. for (Text value : values) {
  52. data += (value.toString() +"-");
  53. }
  54. data = data.substring(0, data.length()-1);// 包前不包后
  55. context.write(new Text(data), key);
  56. }
  57. }
  58. }

获取两两用户之间的好友(会排除掉只有一个用户一个好友的数据)

  1. public static class FirendsMap extends Mapper<LongWritable, Text, Text, Text>{
  2. @Override
  3. protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  4. // 按照制表符进行切割
  5. String[] split = value.toString().split("\t");
  6. // 对用户进行切分
  7. String[] split1 = split[0].split("-");
  8. Arrays.sort(split1);
  9. // 不要最后一个
  10. for (int i = 0; i < split1.length-1; i++) {
  11. // 跳过第一个
  12. for (int i1 = i+1; i1 < split1.length; i1++) {
  13. // 拼接两两用户
  14. context.write(new Text(split1[i]+"-"+split1[i1]), new Text(split[1]));
  15. }
  16. }
  17. }
  18. }
  1. public static class FirendsReduce extends Reducer<Text, Text, Text, Text>{
  2. @Override
  3. protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  4. int count = 0;
  5. String result = "";
  6. // 拼接共同好友 这里用 \t 拼接
  7. // 计数共同好友
  8. for (Text value : values) {
  9. count +=1;
  10. result += value+"\t";
  11. }
  12. result += count; // result最后一个元素是\t 所以这里直接拼接个数
  13. context.write(key, new Text(result));
  14. }
  15. }
  1. A-B C D 2
  2. A-C D F 2
  3. A-N C 1
  4. A-Z D 1
  5. B-C D 1
  6. B-N C A 2
  7. B-Z D 1
  8. C-N K L 2
  9. C-Z D K 2
  10. N-Z K M S 3
  1. package mapFirends;
  2. import org.apache.hadoop.conf.Configuration;
  3. import org.apache.hadoop.fs.FileSystem;
  4. import org.apache.hadoop.fs.Path;
  5. import org.apache.hadoop.io.LongWritable;
  6. import org.apache.hadoop.io.Text;
  7. import org.apache.hadoop.mapreduce.Job;
  8. import org.apache.hadoop.mapreduce.Mapper;
  9. import org.apache.hadoop.mapreduce.Reducer;
  10. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  11. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  12. import java.io.IOException;
  13. import java.util.Arrays;
  14. public class FirendsTwo {
  15. public static void deleteFile(Path s) throws IOException {
  16. FileSystem fileSystem = FileSystem.get(new Configuration());
  17. if(fileSystem.exists(s)){
  18. fileSystem.delete(s, true);
  19. }
  20. }
  21. public static void main(String[] args) throws Exception {
  22. Configuration conf = new Configuration();
  23. Job job = Job.getInstance(conf, "word count");
  24. job.setJarByClass(FirendsTwo.class);
  25. job.setMapperClass(FirendsTwo.FirendsMap.class);
  26. // job.setCombinerClass(FirendsTwo.FirendsReduce.class);
  27. job.setReducerClass(FirendsTwo.FirendsReduce.class);
  28. job.setOutputKeyClass(Text.class);
  29. job.setOutputValueClass(Text.class);
  30. Path path = new Path("data/result5");
  31. deleteFile(path);
  32. FileInputFormat.addInputPath(job,new Path("data/result4"));
  33. FileOutputFormat.setOutputPath(job,path);
  34. System.exit(job.waitForCompletion(true)?0:1);
  35. }
  36. public static class FirendsMap extends Mapper<LongWritable, Text, Text, Text>{
  37. @Override
  38. protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  39. // 按照制表符进行切割
  40. String[] split = value.toString().split("\t");
  41. // 对用户进行切分
  42. String[] split1 = split[0].split("-");
  43. Arrays.sort(split1);
  44. // 不要最后一个
  45. for (int i = 0; i < split1.length-1; i++) {
  46. // 跳过第一个
  47. for (int i1 = i+1; i1 < split1.length; i1++) {
  48. // 拼接两两用户
  49. context.write(new Text(split1[i]+"-"+split1[i1]), new Text(split[1]));
  50. }
  51. }
  52. }
  53. }
  54. public static class FirendsReduce extends Reducer<Text, Text, Text, Text>{
  55. @Override
  56. protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {
  57. int count = 0;
  58. String result = "";
  59. // 拼接共同好友 这里用 \t 拼接
  60. // 计数共同好友
  61. for (Text value : values) {
  62. count +=1;
  63. result += value+"\t";
  64. }
  65. result += count; // result最后一个元素是\t 所以这里直接拼接个数
  66. context.write(key, new Text(result));
  67. }
  68. }
  69. }

总结

  1. 理解逻辑 代码不难
  2. 注意拼接用户的时候一个不要最后一个 一个不要第一个