代码

demo01是对象 demo02是基本类型

  1. package com
  2. import org.apache.spark.SparkContext
  3. import org.apache.spark.rdd.RDD
  4. import org.apache.spark.sql.SparkSession
  5. /**
  6. * 定义一个对象
  7. *
  8. * @param name
  9. * @param age
  10. */
  11. case class User(name: String, age: Int)
  12. object RDD2DF {
  13. /** 对象
  14. */
  15. def demo01(): Unit = {
  16. val spark: SparkSession = SparkSession.builder()
  17. .master("local[2]")
  18. .appName("RDD2DF")
  19. .getOrCreate()
  20. import spark.implicits._
  21. val sc: SparkContext = spark.sparkContext
  22. // val rdd = sc.parallelize(1 to 10)
  23. val rdd = sc.parallelize(Array(User("lisi", 10), User("zs", 20), User("ww", 15)))
  24. rdd.toDF("name", "age").show
  25. spark.stop()
  26. }
  27. /**
  28. * 基本数据类型
  29. */
  30. def demo02(): Unit = {
  31. val spark: SparkSession = SparkSession.builder()
  32. .master("local[2]")
  33. .appName("RDD2DF")
  34. .getOrCreate()
  35. import spark.implicits._
  36. val sc: SparkContext = spark.sparkContext
  37. val rdd: RDD[(String, Int)] = sc.parallelize(("lisi", 10) :: ("zs", 20) :: Nil)
  38. rdd.toDF("name", "name").show
  39. spark.stop()
  40. }
  41. def main(args: Array[String]): Unit = {
  42. demo01() //对象
  43. // demo02() // 基本类型
  44. }
  45. }

demo01输出

  1. +----+---+
  2. |name|age|
  3. +----+---+
  4. |lisi| 10|
  5. | zs| 20|
  6. | ww| 15|
  7. +----+---+

demo02 输出

  1. +----+----+
  2. |name|name|
  3. +----+----+
  4. |lisi| 10|
  5. | zs| 20|
  6. +----+----+