[01].准备工作
- 前台
/stuff/dept.txt
/stuff/input
(用不着)
- 本地
需要这样三个员工资源,我的其实都是一样的,为了简便起见。
stuf_1.txt
(其他三个一样或者你自己随便写)103294 11 9033
103295 12 9033
103296 14 9033
103297 13 9033
103298 12 9033
103299 11 9033
103290 13 9033
103291 14 9033
103292 14 9033
103293 10 9033
103284 10 9033
103274 11 9033
103264 12 9033
dept.txt
部门编号10 暴龙奥特曼分部
11 迪迦奥特曼分部
12 艾斯奥特曼分部
13 泰罗奥特曼分部
14 赛罗奥特曼分部
01.老师给定的数据
staff_01.txt
10021456 11 9239
10021457 14 7168
10021458 12 8580
10021459 11 6537
10021460 12 8735
10021461 14 7086
10021462 14 9637
10021463 10 7411
10021464 12 6506
10021465 12 9141
10021466 11 6865
10021467 12 6130
10021468 11 8075
10021469 12 8654
10021470 11 6856
10021471 13 7452
10021472 10 9514
10021473 14 7524
10021474 12 9836
10021475 11 6633
10021476 12 9724
10021477 11 6425
10021478 10 8222
10021479 10 7353
10021480 13 6945
10021481 10 8507
10021482 14 7406
10021483 11 8574
10021484 13 7790
10021485 11 6050
10021486 12 8211
10021487 11 9236
10021488 12 6108
10021489 10 6319
10021490 11 9980
10021491 14 8554
10021492 12 9969
10021493 12 9582
10021494 10 7746
10021495 13 9664
10021496 14 9905
10021497 11 7072
10021498 11 7076
10021499 10 7276
10021500 10 6263
10021501 11 8525
10021502 10 6774
10021503 10 6561
10021504 14 9383
10021505 11 7630
staff_02.txt
10003566 13 7953
10003567 10 7356
10003568 14 9307
10003569 11 6624
10003570 11 7515
10003571 11 7966
10003572 13 7951
10003573 10 8763
10003574 11 8924
10003575 14 8633
10003576 12 6526
10003577 12 6537
10003578 13 7605
10003579 10 8560
10003580 11 7103
10003581 10 7049
10003582 10 7395
10003583 10 9529
10003584 12 6073
10003585 14 9974
10003586 13 9958
10003587 12 6525
10003588 14 7234
10003589 12 7591
10003590 11 7978
10003591 12 8683
10003592 11 9653
10003593 13 8763
10003594 12 6858
10003595 11 9773
10003596 13 8922
10003597 14 6267
10003598 13 8810
10003599 12 7875
10003600 10 8210
10003601 12 9061
10003602 10 7606
10003603 14 7936
10003604 13 9912
10003605 14 9456
10003606 13 7133
10003607 12 7139
10003608 12 8047
10003609 10 6840
10003610 12 8338
10003611 13 7212
10003612 13 6194
10003613 13 9296
10003614 11 6890
10003615 14 6866
staff_03.txt
10014823 12 9139
10014824 14 8002
10014825 14 9919
10014826 10 9976
10014827 12 6790
10014828 11 7669
10014829 12 9763
10014830 11 9105
10014831 12 8688
10014832 13 7595
10014833 13 9710
10014834 14 6655
10014835 13 8521
10014836 12 9620
10014837 14 9129
10014838 13 7387
10014839 12 9356
10014840 12 6740
10014841 13 8308
10014842 10 9272
10014843 14 7584
10014844 12 6767
10014845 14 7184
10014846 10 9998
10014847 12 9744
10014848 10 8675
10014849 12 6929
10014850 10 8096
10014851 12 8619
10014852 11 8189
10014853 14 6053
10014854 10 7049
10014855 14 6184
10014856 14 9899
dept.txt
10 ACCOUNTING
11 RESEARCH
12 SALES
13 OPERATIONS
14 DEVELOPMENT
[02].项目代码
01.
StaffMapper
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import cn.aigamejxb.hadoop.hdfs.tools.FSManager;
/*
* KEYIN:默认已经拿到的key(行首的偏移量:LongWritable(相当于java的long类型))
*
* VALUEIN:默认分片好的内容(Text类型(相当于string))
*
* KEYOUT:输出的key(Text类型)
*
* VALUEOUT:输出的value(都是1,记作{"word",1},数字类型IntWritable(int))
*/
public class StaffMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
//用map集合存放depts的数据
private Map<String,String>depts = new HashMap<String,String>();
@Override
protected void setup(Mapper<LongWritable, Text, Text, LongWritable>.Context context)
throws IOException, InterruptedException {
//用之前封装好的方法,读取文件
try {
String content = FSManager.readContent("/stuff/dept.txt");
String content_lines[] = content.split("\n");
for(String line : content_lines) {
if(!line.trim().equals("")) {
String items[] = line.split(" ");
depts.put(items[0], items[1]);
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (URISyntaxException e) {
e.printStackTrace();
}
}
private Text outKey = new Text();
private LongWritable longValue = new LongWritable();
@Override
protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
String items[] = value.toString().split(" ");//按照分隔符切分字符串,提取出部门和工资
//item[1]:部门 item[2]:工资
//从dept.txt中拿到部门编号:重写setup方法(map之前回调一次,适用于小的表)
outKey.set(depts.get(items[1]));//拿到部门对应的编号
longValue.set(Long.parseLong(items[2]));//薪水
context.write(outKey, longValue);
}
}
02.
StaffReduce
import java.io.IOException;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/*
* KEYIN:reduse输入key(部门名称Text)
* VALUEIN:reduse输入value(工资LongWritable[1,2,3,4,5,6...])
* KEYOUT:reduse输出的key(部门名称Text)
* VALUEOUT:平均工资(DoubleWritable)
*/
public class StaffReduce extends Reducer<Text, LongWritable, Text,DoubleWritable> {
private DoubleWritable outValue = new DoubleWritable();
@Override
protected void reduce(Text key, Iterable<LongWritable> values,Context context) throws IOException, InterruptedException {
long num = 0 ;//数量
long sum = 0 ;//总数
for (LongWritable value : values) {
++num;
sum+=value.get();
}
outValue.set(sum/(double)num);//薪水平均值
context.write(key, outValue);
}
}
03.
StaffDriver
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class StaffDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// 1.创建job对象
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
// 2.设置jar路径
job.setJarByClass(StaffDriver.class);
// 3.关联map与red
job.setMapperClass(StaffMapper.class);
job.setReducerClass(StaffReduce.class);
// 4.设置map输出的键值对类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
// 5.设置最终数据输出键值对类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(DoubleWritable.class);
// 6.设置输入路径(TextInputFormat)和输出路径(FileOutputFormat)
TextInputFormat.addInputPath(job, new Path("D:\\360MoveData\\Users\\AIGameJXB\\Desktop\\Hadoop\\JavaProjectBigData\\src\\cn\\aigamejxb\\hadoop\\mapreduce\\staff\\input\\stuff_1.txt"));
TextInputFormat.addInputPath(job, new Path("D:\\360MoveData\\Users\\AIGameJXB\\Desktop\\Hadoop\\JavaProjectBigData\\src\\cn\\aigamejxb\\hadoop\\mapreduce\\staff\\input\\stuff_2.txt"));
TextInputFormat.addInputPath(job, new Path("D:\\360MoveData\\Users\\AIGameJXB\\Desktop\\Hadoop\\JavaProjectBigData\\src\\cn\\aigamejxb\\hadoop\\mapreduce\\staff\\input\\stuff_3.txt"));
FileOutputFormat.setOutputPath(job, new Path("D:\\360MoveData\\Users\\AIGameJXB\\Desktop\\Hadoop\\JavaProjectBigData\\src\\cn\\aigamejxb\\hadoop\\mapreduce\\staff\\output\\stuff"));
// 7.提交job
boolean result = job.waitForCompletion(true);// true:打印运行信息
System.exit(result ? 0 : 1);// 1:非正常退出
}
}
[03].测试运行
截屏(显而易见,运行成功了)
- 接下来去看看输出了什么