DataSet转RDD
调用rdd方法即可
import org.apache.spark.rdd.RDDimport org.apache.spark.sql.SparkSessionobject DS2RDD {def main(args: Array[String]): Unit = {val spark: SparkSession = SparkSession.builder().master("local[*]").appName("DS2RDD").getOrCreate()import spark.implicits._val ds = Seq(User("lisi", 40), User("zs", 20)).toDSval rdd: RDD[User] = ds.rddrdd.collect.foreach(println)spark.stop()}}
输出
User(lisi,40)User(zs,20)
RDD转DataSet
调用toDS方法
import org.apache.spark.sql.{Dataset, SparkSession}// 声明一个实体类case class User(name: String, age: Int)object RDD2DS {def main(args: Array[String]): Unit = {val spark: SparkSession = SparkSession.builder().master("local[*]").appName("RDD2DS").getOrCreate()import spark.implicits._//从集合中创建rddval rdd = spark.sparkContext.parallelize(Array(User("lisi", 10), User("zs", 20), User("ww", 15)))val ds: Dataset[User] = rdd.toDS()ds.show()spark.close()}}
输出
+----+---+|name|age|+----+---+|lisi| 10|| zs| 20|| ww| 15|+----+---+
