import java.util
import org.apache.commons.pool2.impl.GenericObjectPoolConfig
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, SparkSession}
import redis.clients.jedis.{Jedis, JedisPool}
import redis.clients.util.Pool
import scala.collection.mutable.ArrayBuffer
/**
* 创建时间:2022-02-23
* 创建人: xiaotao
* 数据流向:hiveToRedis
* 需求:好书精讲专栏页(二期)
* 专栏页,分类区,版单列表。服务端返回,10个讲书专辑类型的专辑。按专辑14天内的播放次数,由多到少倒叙排序。
*/
object GoodBookSpeakWellDateToRedis {
private[this] var jedisPool: Pool[Jedis] = _
/**
* 创建redis连接池
* @param host 地址
* @param port 端口号
* @param timeout 超时
* @param password 密码
*/
def init(host: String, port: Int, timeout: Int, password: String): Unit = {
jedisPool = new JedisPool(new GenericObjectPoolConfig, host, port, timeout, password)
}
/**
* 添加数据到zset中
* @param key zset 的key
* @param story_14d_play_cts score (14天内播放次数)
* @param story_id value 专辑id
*/
def zadd(key: String, story_14d_play_cts: Int, story_id: String): Unit = {
val jedis = jedisPool.getResource
jedis.zadd(key,story_14d_play_cts,story_id)
jedis.close()
}
/**
* 删除key中指定元素
*/
def zrem(key: String,member :String): Unit ={
val jedis = jedisPool.getResource
jedis.zrem(key,member)
jedis.close()
}
// 数据存入redis
def getResultToRedis(spark: SparkSession): Unit = {
val resultDataNew: DataFrame = spark.sql(
"""
|select story_id
| ,cast(story_14d_play_cts as int) as story_14d_play_cts
|from ads.ads_ks_log_story_play_hsjj_14h_sum_a_d
|order by story_14d_play_cts desc
|""".stripMargin)
resultDataNew.show(20)
val mapRDD: RDD[(String, Int)] = resultDataNew.rdd.map(row => (row.getString(0), row.getInt(1)))
//今天的10条数据
val nowDate: Array[(String,Int)] = mapRDD.collect()
val jedis = jedisPool.getResource
import scala.collection.JavaConversions._
//昨天的redis中的10条数据
val beforeDate: util.Set[String] = jedis.zrevrange("EXPLAIN_BOOK_ZS", 0, -1)
jedis.close()
//昨天和今天相同的album_id
val sameDate = new ArrayBuffer[String]
//今天相对于昨天不相同的album_id
val nowNoSameDate = new ArrayBuffer[(String, Int)]
//昨天相对于今天不相同的album_id
val beforeNoSameDate: ArrayBuffer[String] = new ArrayBuffer[String]
for(elem <- nowDate){
if (beforeDate.contains(elem._1)){
sameDate += elem._1
zadd("EXPLAIN_BOOK_ZS",elem._2,elem._1)
} else{
nowNoSameDate += elem
}
}
if(sameDate.length >0 & sameDate.length < 10) {
for (elem <- beforeDate) {
if (!sameDate.contains(elem)) {
beforeNoSameDate += elem
}
}
}else if(sameDate.length == 0){
for(elem <- beforeDate){
beforeNoSameDate += elem
}
}
var i:Int = 0
if(sameDate.length!=10){
//删除一个添加一个
while (i<=nowNoSameDate.length-1){
zrem("EXPLAIN_BOOK_ZS",beforeNoSameDate(i))
zadd("EXPLAIN_BOOK_ZS",nowNoSameDate(i)._2,nowNoSameDate(i)._1)
i+=1
}
}
}
def main(args: Array[String]): Unit = {
val spark = SparkSession.builder
.appName("GoodBookSpeakWellDateToRedis")
// .master("local[*]")
.config("spark.driver.allowMultipleContexts", true)
.config("hive.exec.dynamic.partition.mode", "nonstrict")
.enableHiveSupport()
.getOrCreate()
spark.sparkContext.setLogLevel("ERROR")
val password = ""
val host = ""
val port = 6379
val timeout = 1000
init(host, port, timeout, password)
//数据存入redis
getResultToRedis(spark)
spark.stop()
}
}
spark读取hive数据写入redis
于 2022-02-24 17:37:23 首次发布