1. var df = spark.sparkContext.makeRDD(Seq(
    2. ("1", "张三", "数学", 89),
    3. ("1", "张三", "英语", 78),
    4. ("1", "张三", "物理", 92),
    5. ("2", "李四", "数学", 82),
    6. ("2", "李四", "英语", 98)
    7. )).toDF("id", "name", "course", "score")
    8. df.rdd.groupBy(row => row.getAs[String]("id")).mapValues(rows => {
    9. rows.maxBy(row => row.getAs[Int]("score"))
    10. }).map(_._2).foreach(println)

    找出同一个用户中分数最高的记录