低阶workcount

  1. package tcode.chapter07
  2. import scala.io.Source
  3. object $17_WordCountLow {
  4. def main(args: Array[String]): Unit = {
  5. //1、读取文件
  6. val datas = Source.fromFile("datas/wc2.txt","utf-8").getLines().toList
  7. //List(hello hadoop flume kafka,kafka spark scala hadoop,hello java python hadoop,kafka flume spark spark,hello flume scala java)
  8. //2、切割+压平
  9. val words = datas.flatMap(line=> line.split(" "))
  10. //List(hello,hadoop,flume,kafka,kafka,spark,....)
  11. //3、按照单词分组
  12. val groupedMap = words.groupBy(x=>x)
  13. //Map(
  14. // hello-> List(hello,hello,hello,hello,..)
  15. // ...
  16. // )
  17. //
  18. //4、统计次数
  19. val result = groupedMap.map(x=>{
  20. //x = hello-> List(hello,hello,hello,hello,..)
  21. (x._1,x._2.size)
  22. })
  23. //List( (单词,总次数),(单词,总次数),... )
  24. result.foreach(x=>println(x))
  25. println("-"*100)
  26. //
  27. Source.fromFile("datas/wc.txt","utf-8").getLines().toList.flatMap(_.split(" ")).groupBy(x=>x).map(x=>(x._1,x._2.size)).foreach(println(_))
  28. }
  29. }

高阶workcount

  1. package tcode.chapter07
  2. object $18_WordCountHight {
  3. def main(args: Array[String]): Unit = {
  4. val tupleList = List(("Hello Scala Spark World", 4), ("Hello Scala Spark", 3), ("Hello Scala", 2), ("Hello", 1))
  5. //1、切割+压平,给单词赋予初始次数
  6. val words = tupleList.flatMap(x=>{
  7. //x = ("Hello Scala Spark World", 4)
  8. //切割
  9. val arr = x._1.split(" ")
  10. //Array(Hello,Scala,Spark,Word)
  11. val tu = arr.map(y=>{
  12. //y = Hello
  13. (y,x._2)
  14. })
  15. tu
  16. })
  17. //List( (Hello,4),(Scala,4),(Spark,4),(Word,4),(Hello,3),(Scala,3),(Spark,3),(Hello,2),(Scala,2),(Hello,1) )
  18. //2、按照单词分组
  19. val groupedMap = words.groupBy(x=> x._1)
  20. //Map(
  21. // Hello-> List( (Hello,4), (Hello,3), (Hello,2), (Hello,1) )
  22. // ....
  23. // )
  24. //3、统计单词个数
  25. val result = groupedMap.map(x=>{
  26. //x = Hello-> List( (Hello,4), (Hello,3), (Hello,2), (Hello,1) )
  27. //val r = x._2.reduce((agg,curr)=> (agg._1, agg._2+curr._2))
  28. //r
  29. val r = x._2.map(y=>y._2).sum
  30. (x._1, r )
  31. })
  32. //4、结果展示
  33. result.foreach(x=>println(x))
  34. //List((Hello,10),(Scala,9),(Spark,7),(World,4))
  35. }
  36. }