1.在IDEA里面新建maven工程
    2..在IDEA中编写

    1. package com.ligong.hello
    2. import org.apache.spark.rdd.RDD
    3. import org.apache.spark.{SparkConf, SparkContext}
    4. object WordCount {
    5. def main(args: Array[String]): Unit = {
    6. if(args.length < 2){
    7. println("请指定input和output路径");
    8. System.exit(1)
    9. }
    10. //1.准备spark上下文
    11. val conf: SparkConf = new SparkConf().setAppName("WordCount")
    12. val sc: SparkContext = new SparkContext(conf);
    13. //2.读取文件生成RDD
    14. val lines: RDD[String] = sc.textFile("")
    15. //3.调用transformation算子进行RDD转换
    16. val words: RDD[String] = lines.flatMap(_.split(args(0)))
    17. //4.将RDD转化为键值对
    18. val wordAndOnes: RDD[ (String,Int) ] = words.map((_,1))
    19. //5.聚合操作
    20. val result: RDD[(String, Int)] = wordAndOnes.reduceByKey(_+_)
    21. System.setProperty("HADOOP_USER_NAME","root")
    22. result.repartition(1).saveAsTextFile(args(1))
    23. sc.stop()
    24. }
    25. }

    3.打包成jar包
    4.上传到 /export/spark-jar/
    image.png
    5.赋予权限:

    1. chmod -R 777 wc.jar

    6.提交运行:

    1. spark-submit \
    2. --master yarn \
    3. --deploy-mode cluster \
    4. --driver-memory 512m \
    5. --executor-memory 512m \
    6. --num-executors 1 \
    7. --class com.ligong.hello.WordCount \
    8. /export/spark-jar/wc.jar \
    9. hdfs://node1:8020/wordcount/input/words.txt \
    10. hdfs://node1:8020/wordcount/output

    7.
    image.png
    image.png