遇到问题

windows本地调试Hadoop程序时报错

描述

java.io.FileNotFoundException: java.io.FileNotFoundException: HADOOP_HOME and hadoop.home.dir are unset

原因

其原因是需要在windows本地搭建Hadoop环境,下载winutils文件,并将hadoop-2.8.4包内的bin文件替换,将下载文件中hadoop.dll放到C:\Windows\System32下

解决办法

打包放在linux上的hadoop环境运行。

代码清单

  1. import org.apache.hadoop.conf.Configuration;
  2. import org.apache.hadoop.fs.Path;
  3. import org.apache.hadoop.io.IntWritable;
  4. import org.apache.hadoop.io.Text;
  5. import org.apache.hadoop.mapreduce.Job;
  6. import org.apache.hadoop.mapreduce.Mapper;
  7. import org.apache.hadoop.mapreduce.Reducer;
  8. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  9. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  10. import java.io.IOException;
  11. import java.util.StringTokenizer;
  12. public class WordCount {
  13. //编写TokenizerMapper类继承Mapper类
  14. public static class TokenizerMapper extends Mapper<Object,Text,Text,IntWritable>{
  15. //定义变量one值设置为1,为每个单词定义value为1
  16. public static final IntWritable one=new IntWritable(1);
  17. private Text word=new Text();
  18. //编写map函数,其中输入参数为value(即为单词),输出参数为context
  19. public void map(Object key,Text values,Context context) throws IOException, InterruptedException {
  20. StringTokenizer str=new StringTokenizer(values.toString());
  21. while(str.hasMoreTokens()){
  22. word.set(str.nextToken());
  23. context.write(word,one);
  24. }
  25. }
  26. }
  27. //定义IntSumReducer继承Reducer
  28. public static class IntSumReducer extends Reducer<Text,IntWritable,Text,IntWritable>{
  29. private IntWritable result=new IntWritable();
  30. //定义reduce方法
  31. public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
  32. //遍历,将统计各个单词的总个数
  33. int sum=0;
  34. for (IntWritable val:values) {
  35. sum+=val.get();
  36. }
  37. result.set(sum);
  38. context.write(key,result);
  39. }
  40. }
  41. //编写主函数
  42. public static void main(String[] args) throws Exception{
  43. Configuration conf=new Configuration();
  44. Job job=Job.getInstance(conf,"wordCount");
  45. job.setJarByClass(WordCount.class);
  46. job.setMapperClass(TokenizerMapper.class);
  47. job.setCombinerClass(IntSumReducer.class);
  48. job.setReducerClass(IntSumReducer.class);
  49. job.setOutputKeyClass(Text.class);
  50. job.setOutputValueClass(IntWritable.class);
  51. //添加文件的输入路径
  52. FileInputFormat.addInputPath(job, new Path(args[0]));
  53. //添加文件的输出路径
  54. FileOutputFormat.setOutputPath(job, new Path(args[1]));
  55. System.exit(job.waitForCompletion(true)?0:1);
  56. }
  57. }