hive行列转换函数
多行转多列: case when then
多行转一列: collect_set collect_list 且可出现在group by的字段之外
多列合并成一列:concat_ws map(key,value)
列转行:lateral view explode 将hive中复杂的array或map结构拆分成多行
解释:用于和split, explode等UDTF一起使用,它能够将一列数据拆成多行数据,在此基础上可以对拆分后的数据进行聚合。
select
movie,
category_name
from movie_info
lateral view
explode(split(category,‘,’)) movie_info_tmp as category_name;
sql 测试
package data_warehouse.hive
import data_warehouse.hive.utils.ConnectMysql
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{DataFrame, SparkSession}
import sparksql.yunying.blackandwhite.WhiteList.saveWhite
/**
* 多行转多列: case when then
* 多行转一列: collect_set collect_list 且可出现在group by的字段之外
* 多列合并成一列:concat_ws map(key,value)
* 列转行:lateral view explode
* @ author wangjing
* @ create 2022-10-28 11:19
*/
object CollectList {
def regTable(spark:SparkSession): Unit ={
// 1、连接数据库注册表 "white_idcard","white_idcard_result"
val regTables=Array("collect_set","collectList")
for( i <- 0 to regTables.size-1){
val table: String = regTables(i)
ConnectMysql.getConnection(spark,table)
}
}
def collectList(spark:SparkSession): Unit ={
regTable(spark)
// 行转列 列转行 -- 1、多行转多列
val sql_1=
"""
|select student,
|max(case when subjectName='math' then score else -1 end) as math,
|max(case when subjectName='english' then score else -1 end) as english,
|max(case when subjectName='chinese' then score else -1 end) as chinese
|from collect_set
|group by student
|""".stripMargin
// spark.sql(sql).show()
// 多行转一列 collect_list或者collect_set
val sql_2=
"""
|select student,collect_list(subjectName)
|from collect_set group by student
|""".stripMargin
// spark.sql(sql_2).show()
// 列合并 concat_ws
val sql_3=
"""
|select student,concat_ws(":",subjectName,score) from collect_set
|""".stripMargin
// spark.sql(sql_3).show()
// 列转行 lateral view explode
val sql=
"""
|select student,subject,score from (
|select student,map('math',math,'english',english,'chinese',chinese) as scores from collectList
|) tmp
|lateral view explode(scores) tmp2 as subject,score
|""".stripMargin
// spark.sql(sql).show()
// 3、保存结果
// val sinktable="collectList"
// ConnectMysql.saveMysql(spark,sinktable,result)
}
def main(args : Array [String]) : Unit = {
val conf: SparkConf = new SparkConf().setAppName("hql").setMaster("local[*]")
.set("spark.driver.extraJavaOptions", s"-Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf")
.set("spark.executor.extraJavaOptions", s"-Djava.security.auth.login.config=/etc/kafka/kafka_client_jaas.conf")
System.setProperty("java.security.auth.login.config", "E:\\xinzheng\\code\\0317_table\\src\\main\\resources\\kafka_client_jaas.conf")
System.setProperty("java.security.krb5.conf", "E:\\xinzheng\\code\\0317_table\\src\\main\\resources\\krb5.conf")
System.setProperty("sun.security.krb5.debug", "true")
val sc = new SparkContext(conf)
val spark: SparkSession = SparkSession.builder().config(conf).getOrCreate()
collectList(spark)
spark.stop()
}
}