原文链接: https://blog.csdn.net/dkl12/article/details/81381151
package com.soyuan.zhitu.appimport org.apache.spark.sql.SparkSession/*** @author tangwx@soyuan.com.cn* @date 2019/10/30 20:31*/object TestApp {def main(args: Array[String]): Unit = {val spark = SparkSession.builder().getOrCreate()val userData = Array(("Leo", 16), ("Marry", 21), ("Jack", 14), ("Tom", 18))val userDF = spark.createDataFrame(userData).toDF("name", "age")// userDF.showuserDF.createOrReplaceTempView("user")// spark.udf.register("strLen",(str:String) => str.length)// spark.sql("select name,strLen(name) as name_len from user").show// spark.udf.register("isAdult",isAdult _)import org.apache.spark.sql.functions._//注册自定义函数(通过匿名函数)val strLen = udf((str: String) => str.length())//注册自定义函数(通过实名函数)val udf_isAdult = udf(isAdult _)//val udf_isAdult = udf(isAdult(_:Int))userDF.withColumn("name_len",strLen(col("name"))).withColumn("isAdult",udf_isAdult(col("age"))).showuserDF.select(col("*"),strLen(col("name")) as "name_len1",udf_isAdult(col("age")) as "isAdult1").showval newUdf = udf((str1:String,str2:String) => {if (str1 !=null && str2!=null) {str1.length+str2.length}else{-1}})// userDF.withColumn("newUdf",newUdf(col("name"),col("age")))// .withColumn("isAdult",udf_isAdult(col("age")))// .showuserDF.select(col("*"),newUdf(col("name"),col("age"))).show}/*** 根据年龄大小返回是否成年 成年:true,未成年:false*/def isAdult(age: Int): Boolean = {if (age < 18) {false} else {true}}}
