原文链接: https://blog.csdn.net/dkl12/article/details/81381151
package com.soyuan.zhitu.app
import org.apache.spark.sql.SparkSession
/**
* @author tangwx@soyuan.com.cn
* @date 2019/10/30 20:31
*/
object TestApp {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.getOrCreate()
val userData = Array(("Leo", 16), ("Marry", 21), ("Jack", 14), ("Tom", 18))
val userDF = spark.createDataFrame(userData).toDF("name", "age")
// userDF.show
userDF.createOrReplaceTempView("user")
// spark.udf.register("strLen",(str:String) => str.length)
// spark.sql("select name,strLen(name) as name_len from user").show
// spark.udf.register("isAdult",isAdult _)
import org.apache.spark.sql.functions._
//注册自定义函数(通过匿名函数)
val strLen = udf((str: String) => str.length())
//注册自定义函数(通过实名函数)
val udf_isAdult = udf(isAdult _)
//val udf_isAdult = udf(isAdult(_:Int))
userDF.withColumn("name_len",strLen(col("name")))
.withColumn("isAdult",udf_isAdult(col("age")))
.show
userDF.select(
col("*"),
strLen(col("name")) as "name_len1",
udf_isAdult(col("age")) as "isAdult1"
).show
val newUdf = udf((str1:String,str2:String) => {
if (str1 !=null && str2!=null) {
str1.length+str2.length
}else{
-1
}
})
// userDF.withColumn("newUdf",newUdf(col("name"),col("age")))
// .withColumn("isAdult",udf_isAdult(col("age")))
// .show
userDF.select(
col("*"),
newUdf(col("name"),col("age"))
).show
}
/**
* 根据年龄大小返回是否成年 成年:true,未成年:false
*/
def isAdult(age: Int): Boolean = {
if (age < 18) {
false
} else {
true
}
}
}