DataSet转RDD

调用rdd方法即可

  1. import org.apache.spark.rdd.RDD
  2. import org.apache.spark.sql.SparkSession
  3. object DS2RDD {
  4. def main(args: Array[String]): Unit = {
  5. val spark: SparkSession = SparkSession
  6. .builder()
  7. .master("local[*]")
  8. .appName("DS2RDD")
  9. .getOrCreate()
  10. import spark.implicits._
  11. val ds = Seq(User("lisi", 40), User("zs", 20)).toDS
  12. val rdd: RDD[User] = ds.rdd
  13. rdd.collect.foreach(println)
  14. spark.stop()
  15. }
  16. }

输出

  1. User(lisi,40)
  2. User(zs,20)

RDD转DataSet

调用toDS方法

  1. import org.apache.spark.sql.{Dataset, SparkSession}
  2. // 声明一个实体类
  3. case class User(name: String, age: Int)
  4. object RDD2DS {
  5. def main(args: Array[String]): Unit = {
  6. val spark: SparkSession = SparkSession
  7. .builder()
  8. .master("local[*]")
  9. .appName("RDD2DS")
  10. .getOrCreate()
  11. import spark.implicits._
  12. //从集合中创建rdd
  13. val rdd = spark.sparkContext
  14. .parallelize(Array(User("lisi", 10), User("zs", 20), User("ww", 15)))
  15. val ds: Dataset[User] = rdd.toDS()
  16. ds.show()
  17. spark.close()
  18. }
  19. }

输出

  1. +----+---+
  2. |name|age|
  3. +----+---+
  4. |lisi| 10|
  5. | zs| 20|
  6. | ww| 15|
  7. +----+---+