DataSet转RDD
调用rdd方法即可
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
object DS2RDD {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession
.builder()
.master("local[*]")
.appName("DS2RDD")
.getOrCreate()
import spark.implicits._
val ds = Seq(User("lisi", 40), User("zs", 20)).toDS
val rdd: RDD[User] = ds.rdd
rdd.collect.foreach(println)
spark.stop()
}
}
输出
User(lisi,40)
User(zs,20)
RDD转DataSet
调用toDS方法
import org.apache.spark.sql.{Dataset, SparkSession}
// 声明一个实体类
case class User(name: String, age: Int)
object RDD2DS {
def main(args: Array[String]): Unit = {
val spark: SparkSession = SparkSession
.builder()
.master("local[*]")
.appName("RDD2DS")
.getOrCreate()
import spark.implicits._
//从集合中创建rdd
val rdd = spark.sparkContext
.parallelize(Array(User("lisi", 10), User("zs", 20), User("ww", 15)))
val ds: Dataset[User] = rdd.toDS()
ds.show()
spark.close()
}
}
输出
+----+---+
|name|age|
+----+---+
|lisi| 10|
| zs| 20|
| ww| 15|
+----+---+