功能说明

在一个(Key,Value)的RDD上调用,Key必须实现Ordered接口,返回一个按照Key进行排序的(Key,Value)的RDD

案例

根据key升序或者降序排序

  1. import org.apache.spark.rdd.RDD
  2. import org.apache.spark.{SparkConf, SparkContext}
  3. object demo {
  4. def main(args: Array[String]): Unit = {
  5. val conf: SparkConf = new SparkConf().setAppName("SparkCoreTest").setMaster("local[*]")
  6. val sc: SparkContext = new SparkContext(conf)
  7. val rdd: RDD[(Int, String)] = sc.makeRDD(Array((3, "aa"), (6, "cc"), (2, "bb"), (1, "dd")))
  8. //按照key对rdd中的元素进行排序 默认升序
  9. val newRDD1: RDD[(Int, String)] = rdd.sortByKey()
  10. println(newRDD1.collect().mkString(",")) //输出: (1,dd),(2,bb),(3,aa),(6,cc)
  11. println("----------")
  12. //降序
  13. val newRDD2: RDD[(Int, String)] = rdd.sortByKey(false)
  14. println(newRDD2.collect().mkString(",")) //输出: (6,cc),(3,aa),(2,bb),(1,dd)
  15. sc.stop()
  16. }
  17. }

对对象进行排序 根据名字排序,如果名字相同就根据年龄排序

  1. import org.apache.spark.rdd.RDD
  2. import org.apache.spark.{SparkConf, SparkContext}
  3. object demo {
  4. def main(args: Array[String]): Unit = {
  5. val conf: SparkConf = new SparkConf().setAppName("SparkCoreTest").setMaster("local[*]")
  6. val sc: SparkContext = new SparkContext(conf)
  7. //如果key为自定义类型,要求必须混入Ordered特质
  8. val stdList: List[(Student, Int)] = List(
  9. (new Student("jingjing", 18), 1),
  10. (new Student("bangzhang", 18), 1),
  11. (new Student("jingjing", 19), 1),
  12. (new Student("luoxiang", 18), 1),
  13. (new Student("jingjing", 20), 1)
  14. )
  15. val stdRDD: RDD[(Student, Int)] = sc.makeRDD(stdList)
  16. val resRDD: RDD[(Student, Int)] = stdRDD.sortByKey()
  17. println(resRDD.collect().mkString(" -- "))
  18. /*输出:
  19. (Student(bangzhang, 18),1)
  20. -- (Student(jingjing, 20),1)
  21. -- (Student(jingjing, 19),1)
  22. -- (Student(jingjing, 18),1)
  23. -- (Student(luoxiang, 18),1)
  24. */
  25. sc.stop()
  26. }
  27. }
  28. class Student(var name: String, var age: Int) extends Ordered[Student] with Serializable {
  29. //指定比较规则
  30. override def compare(that: Student): Int = {
  31. //先按照名称排序升序,如果名称相同的话,再按照年龄降序排序
  32. var res: Int = this.name.compareTo(that.name) //当前名称对象和传递过来的名称对象去做比较,
  33. if (res == 0) { //如果为0说明名字是一样的.
  34. res = this.age - that.age // 升序
  35. res = that.age - this.age // 降序
  36. }
  37. res
  38. }
  39. override def toString = s"Student($name, $age)"
  40. }